jupyter_protocol/media/
mod.rs

1//! Provides types and utilities for working with rich media content in Jupyter messages.
2//!
3//! This module defines the `Media` and `MediaType` structures, which represent
4//! MIME bundles in Jupyter messages. These are used for rich content display
5//! in notebooks and other Jupyter frontends.
6//!
7//! The main types in this module are:
8//!
9//! - [`Media`]: Represents a collection of media types.
10//! - [`MediaType`]: An enum representing various MIME types.
11//!
12//! # Examples
13//!
14//! Creating a media bundle with multiple types:
15//!
16//! ```rust
17//! use jupyter_protocol::media::{Media, MediaType};
18//!
19//! let media = Media::new(vec![
20//!     MediaType::Plain("Hello, world!".to_string()),
21//!     MediaType::Html("<h1>Hello, world!</h1>".to_string()),
22//! ]);
23//! ```
24//!
25//! Finding the richest media type:
26//!
27//! ```rust
28//! use jupyter_protocol::media::{Media, MediaType};
29//!
30//! let media = Media::new(vec![
31//!     MediaType::Plain("Hello, world!".to_string()),
32//!     MediaType::Html("<h1>Hello, world!</h1>".to_string()),
33//!     MediaType::Markdown("**Hello, world!**".to_string()),
34//! ]);
35//!
36//! let richest = media.richest(|media_type| match media_type {
37//!     MediaType::Html(_) => 3,
38//!     MediaType::Markdown(_) => 2,
39//!     MediaType::Plain(_) => 1,
40//!     _ => 0,
41//! });
42//!
43//! assert!(matches!(richest, Some(MediaType::Html(_))));
44//! ```
45use serde::{de, Deserialize, Serialize};
46use serde_json::Value;
47use std::collections::HashMap;
48
49pub mod datatable;
50
51pub use datatable::TabularDataResource;
52
53pub type JsonObject = serde_json::Map<String, serde_json::Value>;
54
55/// An enumeration representing various Media types, otherwise known as MIME (Multipurpose Internet Mail Extensions) types.
56/// These types are used to indicate the nature of the data in a rich content message such as `DisplayData`, `UpdateDisplayData`, and `ExecuteResult`.
57#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
58#[serde(rename_all = "snake_case")]
59#[serde(tag = "type", content = "data")]
60pub enum MediaType {
61    /// Plain text, typically representing unformatted text. (e.g. Python's `_repr_` or `_repr_pretty_` methods).
62    #[serde(rename = "text/plain")]
63    Plain(String),
64    /// HTML, (as displayed via Python's `_repr_html_` method).
65    #[serde(rename = "text/html")]
66    Html(String),
67    /// LaTeX, (as displayed using Python's `_repr_latex_` method).
68    #[serde(rename = "text/latex")]
69    Latex(String),
70    /// Raw JavaScript code.
71    #[serde(rename = "application/javascript")]
72    Javascript(String),
73    /// Markdown text, (as displayed using Python's `_repr_markdown_` method).
74    #[serde(rename = "text/markdown")]
75    Markdown(String),
76
77    /// SVG image text, (as displayed using Python's `_repr_svg_` method).
78    #[serde(rename = "image/svg+xml")]
79    Svg(String),
80
81    // Image data is all base64 encoded. These variants could all accept <Vec<u8>> as the
82    // data. However, not all users of this library will need immediate decoding of the data.
83    /// PNG image data.
84    #[serde(rename = "image/png")]
85    Png(String),
86    /// JPEG image data.
87    #[serde(rename = "image/jpeg")]
88    Jpeg(String),
89    /// GIF image data.
90    #[serde(rename = "image/gif")]
91    Gif(String),
92
93    /// Raw JSON Object
94    #[serde(rename = "application/json")]
95    Json(JsonObject),
96
97    /// GeoJSON data, a format for encoding a variety of geographic data structures.
98    #[serde(rename = "application/geo+json")]
99    GeoJson(JsonObject),
100    /// Data table in JSON format, requires both a `data` and `schema`.
101    /// Example: `{data: [{'ghost': true, 'says': "boo"}], schema: {fields: [{name: 'ghost', type: 'boolean'}, {name: 'says', type: 'string'}]}}`.
102    #[serde(rename = "application/vnd.dataresource+json")]
103    DataTable(Box<TabularDataResource>),
104    /// Plotly JSON Schema for for rendering graphs and charts.
105    #[serde(rename = "application/vnd.plotly.v1+json")]
106    Plotly(JsonObject),
107    /// Jupyter/IPython widget view in JSON format.
108    #[serde(rename = "application/vnd.jupyter.widget-view+json")]
109    WidgetView(JsonObject),
110    /// Jupyter/IPython widget state in JSON format.
111    #[serde(rename = "application/vnd.jupyter.widget-state+json")]
112    WidgetState(JsonObject),
113    /// VegaLite data in JSON format for version 2 visualizations.
114    #[serde(rename = "application/vnd.vegalite.v2+json")]
115    VegaLiteV2(JsonObject),
116    /// VegaLite data in JSON format for version 3 visualizations.
117    #[serde(rename = "application/vnd.vegalite.v3+json")]
118    VegaLiteV3(JsonObject),
119    /// VegaLite data in JSON format for version 4 visualizations.
120    #[serde(rename = "application/vnd.vegalite.v4+json")]
121    VegaLiteV4(JsonObject),
122    /// VegaLite data in JSON format for version 5 visualizations.
123    #[serde(rename = "application/vnd.vegalite.v5+json")]
124    VegaLiteV5(JsonObject),
125    /// VegaLite data in JSON format for version 6 visualizations.
126    #[serde(rename = "application/vnd.vegalite.v6+json")]
127    VegaLiteV6(JsonObject),
128    /// Vega data in JSON format for version 3 visualizations.
129    #[serde(rename = "application/vnd.vega.v3+json")]
130    VegaV3(JsonObject),
131    /// Vega data in JSON format for version 4 visualizations.
132    #[serde(rename = "application/vnd.vega.v4+json")]
133    VegaV4(JsonObject),
134    /// Vega data in JSON format for version 5 visualizations.
135    #[serde(rename = "application/vnd.vega.v5+json")]
136    VegaV5(JsonObject),
137
138    /// Represents Virtual DOM (nteract/vdom) data in JSON format.
139    #[serde(rename = "application/vdom.v1+json")]
140    Vdom(JsonObject),
141
142    // Catch all type for serde ease.
143    // TODO: Implement a custom deserializer so this extra type isn't in resulting serializations.
144    Other((String, Value)),
145}
146
147impl std::hash::Hash for MediaType {
148    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
149        match &self {
150            MediaType::Plain(_) => "text/plain",
151            MediaType::Html(_) => "text/html",
152            MediaType::Latex(_) => "text/latex",
153            MediaType::Javascript(_) => "application/javascript",
154            MediaType::Markdown(_) => "text/markdown",
155            MediaType::Svg(_) => "image/svg+xml",
156            MediaType::Png(_) => "image/png",
157            MediaType::Jpeg(_) => "image/jpeg",
158            MediaType::Gif(_) => "image/gif",
159            MediaType::Json(_) => "application/json",
160            MediaType::GeoJson(_) => "application/geo+json",
161            MediaType::DataTable(_) => "application/vnd.dataresource+json",
162            MediaType::Plotly(_) => "application/vnd.plotly.v1+json",
163            MediaType::WidgetView(_) => "application/vnd.jupyter.widget-view+json",
164            MediaType::WidgetState(_) => "application/vnd.jupyter.widget-state+json",
165            MediaType::VegaLiteV2(_) => "application/vnd.vegalite.v2+json",
166            MediaType::VegaLiteV3(_) => "application/vnd.vegalite.v3+json",
167            MediaType::VegaLiteV4(_) => "application/vnd.vegalite.v4+json",
168            MediaType::VegaLiteV5(_) => "application/vnd.vegalite.v5+json",
169            MediaType::VegaLiteV6(_) => "application/vnd.vegalite.v6+json",
170            MediaType::VegaV3(_) => "application/vnd.vega.v3+json",
171            MediaType::VegaV4(_) => "application/vnd.vega.v4+json",
172            MediaType::VegaV5(_) => "application/vnd.vega.v5+json",
173            MediaType::Vdom(_) => "application/vdom.v1+json",
174            MediaType::Other((key, _)) => key.as_str(),
175        }
176        .hash(state)
177    }
178}
179
180/// A `Media` is a collection of data associated with different Media types.
181/// It allows for the representation of rich content that can be displayed in multiple formats.
182/// These are found in the `data` field of a `DisplayData` and `ExecuteResult` messages/output types.
183///
184#[derive(Default, Serialize, Deserialize, Debug, Clone)]
185pub struct Media {
186    /// A map of Media types to their corresponding data, represented as JSON `Value`.
187    #[serde(
188        flatten,
189        deserialize_with = "deserialize_media",
190        serialize_with = "serialize_media_for_wire"
191    )]
192    pub content: Vec<MediaType>,
193}
194
195fn deserialize_media<'de, D>(deserializer: D) -> Result<Vec<MediaType>, D::Error>
196where
197    D: serde::Deserializer<'de>,
198{
199    // Jupyter protocol does pure Map<String, Value> for media types.
200    // Our deserializer goes a step further by having enums that have their data fully typed
201    let map: HashMap<String, Value> = HashMap::deserialize(deserializer)?;
202    let mut content = Vec::new();
203
204    for (key, value) in map {
205        // Check if the key matches ^application/(.*\\+)?json$ in order to skip the multiline string handling
206        if key.starts_with("application/") && key.ends_with("json") {
207            let media_type =
208                match serde_json::from_value(Value::Object(serde_json::Map::from_iter([
209                    ("type".to_string(), Value::String(key.clone())),
210                    ("data".to_string(), value.clone()),
211                ]))) {
212                    Ok(mediatype) => mediatype,
213                    Err(_) => MediaType::Other((key, value)),
214                };
215            content.push(media_type);
216            continue;
217        }
218
219        // Now we know we're getting a plain string or an array of strings
220        let text: String = match value.clone() {
221            Value::String(s) => s,
222            Value::Array(arr) => arr
223                .into_iter()
224                .filter_map(|v| v.as_str().map(String::from))
225                .collect::<Vec<String>>()
226                .join(""),
227            _ => return Err(de::Error::custom("Invalid value for text-based media type")),
228        };
229
230        if key.starts_with("image/") {
231            // If we ever want to turn this into Vec<u8> we could do that here. We would need to strip all the whitespace from the base64
232            // encoded image too though. `let text = text.replace("\n", "").replace(" ", "");`
233            // For consistency with other notebook frontends though, we'll keep it the same
234
235            let mediatype: MediaType = match key.as_str() {
236                "image/png" => MediaType::Png(text),
237                "image/jpeg" => MediaType::Jpeg(text),
238                "image/gif" => MediaType::Gif(text),
239                _ => MediaType::Other((key.clone(), value)),
240            };
241            content.push(mediatype);
242            continue;
243        }
244
245        let mediatype: MediaType = match key.as_str() {
246            "text/plain" => MediaType::Plain(text),
247            "text/html" => MediaType::Html(text),
248            "text/latex" => MediaType::Latex(text),
249            "application/javascript" => MediaType::Javascript(text),
250            "text/markdown" => MediaType::Markdown(text),
251            "image/svg+xml" => MediaType::Svg(text),
252
253            // Keep unknown mediatypes exactly as they were
254            _ => MediaType::Other((key.clone(), value)),
255        };
256
257        content.push(mediatype);
258    }
259
260    Ok(content)
261}
262
263pub fn serialize_media_for_wire<S>(
264    content: &Vec<MediaType>,
265    serializer: S,
266) -> Result<S::Ok, S::Error>
267where
268    S: serde::Serializer,
269{
270    serialize_media_with_options(content, serializer, false)
271}
272
273pub fn serialize_media_for_notebook<S>(media: &Media, serializer: S) -> Result<S::Ok, S::Error>
274where
275    S: serde::Serializer,
276{
277    serialize_media_with_options(&media.content, serializer, true)
278}
279
280pub fn serialize_media_with_options<S>(
281    content: &Vec<MediaType>,
282    serializer: S,
283    with_multiline: bool,
284) -> Result<S::Ok, S::Error>
285where
286    S: serde::Serializer,
287{
288    let mut map = HashMap::new();
289
290    for media_type in content {
291        let (key, value) = match media_type {
292            MediaType::Plain(text)
293            | MediaType::Html(text)
294            | MediaType::Latex(text)
295            | MediaType::Javascript(text)
296            | MediaType::Markdown(text)
297            | MediaType::Svg(text) => {
298                let key = match media_type {
299                    MediaType::Plain(_) => "text/plain",
300                    MediaType::Html(_) => "text/html",
301                    MediaType::Latex(_) => "text/latex",
302                    MediaType::Javascript(_) => "application/javascript",
303                    MediaType::Markdown(_) => "text/markdown",
304                    MediaType::Svg(_) => "image/svg+xml",
305                    _ => unreachable!(),
306                };
307                let value = if with_multiline {
308                    let lines: Vec<&str> = text.lines().collect();
309
310                    if lines.len() > 1 {
311                        let entries = lines
312                            .iter()
313                            .map(|line| Value::String(format!("{}\n", line)));
314
315                        Value::Array(entries.collect())
316                    } else {
317                        Value::Array(vec![Value::String(text.clone())])
318                    }
319                } else {
320                    Value::String(text.clone())
321                };
322                (key.to_string(), value)
323            }
324            // ** Treat images in a special way **
325            // Jupyter, in practice, will attempt to keep the multiline version of the image around if it was written in
326            // that way. We'd have to do extra tracking in order to keep this enum consistent, so this is an area
327            // where we may wish to diverge from practice (not protocol or schema, just practice).
328            //
329            // As an example, some frontends will convert images to base64 and then split them into 80 character chunks
330            // with newlines interspersed. We could perform the chunking but then in many cases we will no longer match.
331            MediaType::Jpeg(text) | MediaType::Png(text) | MediaType::Gif(text) => {
332                let key = match media_type {
333                    MediaType::Jpeg(_) => "image/jpeg",
334                    MediaType::Png(_) => "image/png",
335                    MediaType::Gif(_) => "image/gif",
336                    _ => unreachable!(),
337                };
338                let value = if with_multiline {
339                    let lines: Vec<&str> = text.lines().collect();
340
341                    if lines.len() > 1 {
342                        let entries = lines
343                            .iter()
344                            .map(|line| Value::String(format!("{}\n", line)));
345
346                        Value::Array(entries.collect())
347                    } else {
348                        Value::String(text.clone())
349                    }
350                } else {
351                    Value::String(text.clone())
352                };
353
354                (key.to_string(), value)
355            }
356            // Keep unknown media types as is
357            MediaType::Other((key, value)) => (key.clone(), value.clone()),
358            _ => {
359                let serialized =
360                    serde_json::to_value(media_type).map_err(serde::ser::Error::custom)?;
361                if let Value::Object(obj) = serialized {
362                    if let (Some(Value::String(key)), Some(data)) =
363                        (obj.get("type"), obj.get("data"))
364                    {
365                        (key.clone(), data.clone())
366                    } else {
367                        continue;
368                    }
369                } else {
370                    continue;
371                }
372            }
373        };
374        map.insert(key, value);
375    }
376
377    map.serialize(serializer)
378}
379
380impl Media {
381    /// Find the richest media type in the bundle, based on the provided ranker function.
382    /// A rank of 0 indicates that the media type is not supported. Higher numbers indicate
383    /// that the media type is preferred over other media types.
384    ///
385    /// # Examples
386    ///
387    /// ```rust
388    /// use jupyter_protocol::media::{Media, MediaType};
389    ///
390    /// let raw = r#"{
391    ///    "text/plain": "FancyThing()",
392    ///    "text/html": "<h1>Fancy!</h1>",
393    ///    "application/json": {"fancy": true}
394    /// }"#;
395    ///
396    /// let media: Media = serde_json::from_str(raw).unwrap();
397    ///
398    /// let ranker = |media_type: &MediaType| match media_type {
399    ///    MediaType::Html(_) => 3,
400    ///    MediaType::Json(_) => 2,
401    ///    MediaType::Plain(_) => 1,
402    ///    _ => 0,
403    /// };
404    ///
405    /// let richest = media.richest(ranker);
406    ///
407    /// assert_eq!(
408    ///    richest,
409    ///    Some(MediaType::Html(String::from("<h1>Fancy!</h1>"))).as_ref()
410    /// );
411    ///
412    /// ```
413    pub fn richest(&self, ranker: fn(&MediaType) -> usize) -> Option<&MediaType> {
414        self.content
415            .iter()
416            .filter_map(|mediatype| {
417                let rank = ranker(mediatype);
418                if rank > 0 {
419                    Some((rank, mediatype))
420                } else {
421                    None
422                }
423            })
424            .max_by_key(|(rank, _)| *rank)
425            .map(|(_, mediatype)| mediatype)
426    }
427
428    pub fn new(content: Vec<MediaType>) -> Self {
429        Self { content }
430    }
431}
432
433impl From<MediaType> for Media {
434    fn from(media_type: MediaType) -> Self {
435        Media {
436            content: vec![media_type],
437        }
438    }
439}
440
441impl From<Vec<MediaType>> for Media {
442    fn from(content: Vec<MediaType>) -> Self {
443        Media { content }
444    }
445}
446
447// Backwards compatibility with previous versions and Jupyter naming
448pub type MimeBundle = Media;
449pub type MimeType = MediaType;
450
451#[cfg(test)]
452mod test {
453    use datatable::TableSchemaField;
454    use serde_json::json;
455
456    use super::*;
457
458    #[test]
459    fn richest_middle() {
460        let raw = r#"{
461            "text/plain": "Hello, world!",
462            "text/html": "<h1>Hello, world!</h1>",
463            "application/json": {
464                "name": "John Doe",
465                "age": 30
466            },
467            "application/vnd.dataresource+json": {
468                "data": [
469                    {"name": "Alice", "age": 25},
470                    {"name": "Bob", "age": 35}
471                ],
472                "schema": {
473                    "fields": [
474                        {"name": "name", "type": "string"},
475                        {"name": "age", "type": "integer"}
476                    ]
477                }
478            },
479            "application/octet-stream": "Binary data"
480        }"#;
481
482        let bundle: Media = serde_json::from_str(raw).unwrap();
483
484        let ranker = |mediatype: &MediaType| match mediatype {
485            MediaType::Plain(_) => 1,
486            MediaType::Html(_) => 2,
487            _ => 0,
488        };
489
490        match bundle.richest(ranker) {
491            Some(MediaType::Html(data)) => assert_eq!(data, "<h1>Hello, world!</h1>"),
492            _ => panic!("Unexpected media type"),
493        }
494    }
495
496    #[test]
497    fn find_table() {
498        let raw = r#"{
499            "text/plain": "Hello, world!",
500            "text/html": "<h1>Hello, world!</h1>",
501            "application/json": {
502                "name": "John Doe",
503                "age": 30
504            },
505            "application/vnd.dataresource+json": {
506                "data": [
507                    {"name": "Alice", "age": 25},
508                    {"name": "Bob", "age": 35}
509                ],
510                "schema": {
511                    "fields": [
512                        {"name": "name", "type": "string"},
513                        {"name": "age", "type": "integer"}
514                    ]
515                }
516            },
517            "application/octet-stream": "Binary data"
518        }"#;
519
520        let bundle: Media = serde_json::from_str(raw).unwrap();
521
522        let ranker = |mediatype: &MediaType| match mediatype {
523            MediaType::Html(_) => 1,
524            MediaType::Json(_) => 2,
525            MediaType::DataTable(_) => 3,
526            _ => 0,
527        };
528
529        let richest = bundle.richest(ranker);
530
531        match richest {
532            Some(MediaType::DataTable(table)) => {
533                assert_eq!(
534                    table.data,
535                    Some(vec![
536                        json!({"name": "Alice", "age": 25}),
537                        json!({"name": "Bob", "age": 35})
538                    ])
539                );
540                assert_eq!(
541                    table.schema.fields,
542                    vec![
543                        TableSchemaField {
544                            name: "name".to_string(),
545                            field_type: datatable::FieldType::String,
546                            ..Default::default()
547                        },
548                        TableSchemaField {
549                            name: "age".to_string(),
550                            field_type: datatable::FieldType::Integer,
551                            ..Default::default()
552                        }
553                    ]
554                );
555            }
556            _ => panic!("Unexpected mime type"),
557        }
558    }
559
560    #[test]
561    fn find_nothing_and_be_happy() {
562        let raw = r#"{
563            "application/fancy": "Too ✨ Fancy ✨ for you!"
564        }"#;
565
566        let bundle: Media = serde_json::from_str(raw).unwrap();
567
568        let ranker = |mediatype: &MediaType| match mediatype {
569            MediaType::Html(_) => 1,
570            MediaType::Json(_) => 2,
571            MediaType::DataTable(_) => 3,
572            _ => 0,
573        };
574
575        let richest = bundle.richest(ranker);
576
577        assert_eq!(richest, None);
578
579        assert!(bundle.content.contains(&MediaType::Other((
580            "application/fancy".to_string(),
581            json!("Too ✨ Fancy ✨ for you!")
582        ))));
583    }
584
585    #[test]
586    fn no_media_type_supported() {
587        let raw = r#"{
588            "text/plain": "Hello, world!",
589            "text/html": "<h1>Hello, world!</h1>",
590            "application/json": {
591                "name": "John Doe",
592                "age": 30
593            },
594            "application/vnd.dataresource+json": {
595                "data": [
596                    {"name": "Alice", "age": 25},
597                    {"name": "Bob", "age": 35}
598                ],
599                "schema": {
600                    "fields": [
601                        {"name": "name", "type": "string"},
602                        {"name": "age", "type": "integer"}
603                    ]
604                }
605            },
606            "application/octet-stream": "Binary data"
607        }"#;
608
609        let bundle: Media = serde_json::from_str(raw).unwrap();
610        let richest = bundle.richest(|_| 0);
611        assert_eq!(richest, None);
612    }
613
614    #[test]
615    fn ensure_array_of_text_processed() {
616        let raw = r#"{
617            "text/plain": ["Hello, world!"],
618            "text/html": "<h1>Hello, world!</h1>"
619        }"#;
620
621        let bundle: Media = serde_json::from_str(raw).unwrap();
622
623        assert_eq!(bundle.content.len(), 2);
624        assert!(bundle
625            .content
626            .contains(&MediaType::Plain("Hello, world!".to_string())));
627        assert!(bundle
628            .content
629            .contains(&MediaType::Html("<h1>Hello, world!</h1>".to_string())));
630
631        let raw = r#"{
632            "text/plain": ["Hello, world!\n", "Welcome to zombo.com"],
633            "text/html": ["<h1>\n", "  Hello, world!\n", "</h1>"]
634        }"#;
635
636        let bundle: Media = serde_json::from_str(raw).unwrap();
637
638        assert_eq!(bundle.content.len(), 2);
639        assert!(bundle.content.contains(&MediaType::Plain(
640            "Hello, world!\nWelcome to zombo.com".to_string()
641        )));
642        assert!(bundle
643            .content
644            .contains(&MediaType::Html("<h1>\n  Hello, world!\n</h1>".to_string())));
645    }
646}