jupyter_protocol/media/
mod.rs

1//! Provides types and utilities for working with rich media content in Jupyter messages.
2//!
3//! This module defines the `Media` and `MediaType` structures, which represent
4//! MIME bundles in Jupyter messages. These are used for rich content display
5//! in notebooks and other Jupyter frontends.
6//!
7//! The main types in this module are:
8//!
9//! - [`Media`]: Represents a collection of media types.
10//! - [`MediaType`]: An enum representing various MIME types.
11//!
12//! # Examples
13//!
14//! Creating a media bundle with multiple types:
15//!
16//! ```rust
17//! use jupyter_protocol::media::{Media, MediaType};
18//!
19//! let media = Media::new(vec![
20//!     MediaType::Plain("Hello, world!".to_string()),
21//!     MediaType::Html("<h1>Hello, world!</h1>".to_string()),
22//! ]);
23//! ```
24//!
25//! Finding the richest media type:
26//!
27//! ```rust
28//! use jupyter_protocol::media::{Media, MediaType};
29//!
30//! let media = Media::new(vec![
31//!     MediaType::Plain("Hello, world!".to_string()),
32//!     MediaType::Html("<h1>Hello, world!</h1>".to_string()),
33//!     MediaType::Markdown("**Hello, world!**".to_string()),
34//! ]);
35//!
36//! let richest = media.richest(|media_type| match media_type {
37//!     MediaType::Html(_) => 3,
38//!     MediaType::Markdown(_) => 2,
39//!     MediaType::Plain(_) => 1,
40//!     _ => 0,
41//! });
42//!
43//! assert!(matches!(richest, Some(MediaType::Html(_))));
44//! ```
45use serde::{de, Deserialize, Serialize};
46use serde_json::Value;
47use std::collections::HashMap;
48
49pub mod datatable;
50
51pub use datatable::TabularDataResource;
52
53pub type JsonObject = serde_json::Map<String, serde_json::Value>;
54
55/// An enumeration representing various Media types, otherwise known as MIME (Multipurpose Internet Mail Extensions) types.
56/// These types are used to indicate the nature of the data in a rich content message such as `DisplayData`, `UpdateDisplayData`, and `ExecuteResult`.
57#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
58#[serde(rename_all = "snake_case")]
59#[serde(tag = "type", content = "data")]
60pub enum MediaType {
61    /// Plain text, typically representing unformatted text. (e.g. Python's `_repr_` or `_repr_pretty_` methods).
62    #[serde(rename = "text/plain")]
63    Plain(String),
64    /// HTML, (as displayed via Python's `_repr_html_` method).
65    #[serde(rename = "text/html")]
66    Html(String),
67    /// LaTeX, (as displayed using Python's `_repr_latex_` method).
68    #[serde(rename = "text/latex")]
69    Latex(String),
70    /// Raw JavaScript code.
71    #[serde(rename = "application/javascript")]
72    Javascript(String),
73    /// Markdown text, (as displayed using Python's `_repr_markdown_` method).
74    #[serde(rename = "text/markdown")]
75    Markdown(String),
76
77    /// SVG image text, (as displayed using Python's `_repr_svg_` method).
78    #[serde(rename = "image/svg+xml")]
79    Svg(String),
80
81    // Image data is all base64 encoded. These variants could all accept <Vec<u8>> as the
82    // data. However, not all users of this library will need immediate decoding of the data.
83    /// PNG image data.
84    #[serde(rename = "image/png")]
85    Png(String),
86    /// JPEG image data.
87    #[serde(rename = "image/jpeg")]
88    Jpeg(String),
89    /// GIF image data.
90    #[serde(rename = "image/gif")]
91    Gif(String),
92
93    /// Raw JSON Object
94    #[serde(rename = "application/json")]
95    Json(JsonObject),
96
97    /// GeoJSON data, a format for encoding a variety of geographic data structures.
98    #[serde(rename = "application/geo+json")]
99    GeoJson(JsonObject),
100    /// Data table in JSON format, requires both a `data` and `schema`.
101    /// Example: `{data: [{'ghost': true, 'says': "boo"}], schema: {fields: [{name: 'ghost', type: 'boolean'}, {name: 'says', type: 'string'}]}}`.
102    #[serde(rename = "application/vnd.dataresource+json")]
103    DataTable(Box<TabularDataResource>),
104    /// Plotly JSON Schema for for rendering graphs and charts.
105    #[serde(rename = "application/vnd.plotly.v1+json")]
106    Plotly(JsonObject),
107    /// Jupyter/IPython widget view in JSON format.
108    #[serde(rename = "application/vnd.jupyter.widget-view+json")]
109    WidgetView(JsonObject),
110    /// Jupyter/IPython widget state in JSON format.
111    #[serde(rename = "application/vnd.jupyter.widget-state+json")]
112    WidgetState(JsonObject),
113    /// VegaLite data in JSON format for version 2 visualizations.
114    #[serde(rename = "application/vnd.vegalite.v2+json")]
115    VegaLiteV2(JsonObject),
116    /// VegaLite data in JSON format for version 3 visualizations.
117    #[serde(rename = "application/vnd.vegalite.v3+json")]
118    VegaLiteV3(JsonObject),
119    /// VegaLite data in JSON format for version 4 visualizations.
120    #[serde(rename = "application/vnd.vegalite.v4+json")]
121    VegaLiteV4(JsonObject),
122    /// VegaLite data in JSON format for version 5 visualizations.
123    #[serde(rename = "application/vnd.vegalite.v5+json")]
124    VegaLiteV5(JsonObject),
125    /// VegaLite data in JSON format for version 6 visualizations.
126    #[serde(rename = "application/vnd.vegalite.v6+json")]
127    VegaLiteV6(JsonObject),
128    /// Vega data in JSON format for version 3 visualizations.
129    #[serde(rename = "application/vnd.vega.v3+json")]
130    VegaV3(JsonObject),
131    /// Vega data in JSON format for version 4 visualizations.
132    #[serde(rename = "application/vnd.vega.v4+json")]
133    VegaV4(JsonObject),
134    /// Vega data in JSON format for version 5 visualizations.
135    #[serde(rename = "application/vnd.vega.v5+json")]
136    VegaV5(JsonObject),
137
138    /// Represents Virtual DOM (nteract/vdom) data in JSON format.
139    #[serde(rename = "application/vdom.v1+json")]
140    Vdom(JsonObject),
141
142    // Catch all type for serde ease.
143    // TODO: Implement a custom deserializer so this extra type isn't in resulting serializations.
144    Other((String, Value)),
145}
146
147impl std::hash::Hash for MediaType {
148    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
149        match &self {
150            MediaType::Plain(_) => "text/plain",
151            MediaType::Html(_) => "text/html",
152            MediaType::Latex(_) => "text/latex",
153            MediaType::Javascript(_) => "application/javascript",
154            MediaType::Markdown(_) => "text/markdown",
155            MediaType::Svg(_) => "image/svg+xml",
156            MediaType::Png(_) => "image/png",
157            MediaType::Jpeg(_) => "image/jpeg",
158            MediaType::Gif(_) => "image/gif",
159            MediaType::Json(_) => "application/json",
160            MediaType::GeoJson(_) => "application/geo+json",
161            MediaType::DataTable(_) => "application/vnd.dataresource+json",
162            MediaType::Plotly(_) => "application/vnd.plotly.v1+json",
163            MediaType::WidgetView(_) => "application/vnd.jupyter.widget-view+json",
164            MediaType::WidgetState(_) => "application/vnd.jupyter.widget-state+json",
165            MediaType::VegaLiteV2(_) => "application/vnd.vegalite.v2+json",
166            MediaType::VegaLiteV3(_) => "application/vnd.vegalite.v3+json",
167            MediaType::VegaLiteV4(_) => "application/vnd.vegalite.v4+json",
168            MediaType::VegaLiteV5(_) => "application/vnd.vegalite.v5+json",
169            MediaType::VegaLiteV6(_) => "application/vnd.vegalite.v6+json",
170            MediaType::VegaV3(_) => "application/vnd.vega.v3+json",
171            MediaType::VegaV4(_) => "application/vnd.vega.v4+json",
172            MediaType::VegaV5(_) => "application/vnd.vega.v5+json",
173            MediaType::Vdom(_) => "application/vdom.v1+json",
174            MediaType::Other((key, _)) => key.as_str(),
175        }
176        .hash(state)
177    }
178}
179
180/// A `Media` is a collection of data associated with different Media types.
181/// It allows for the representation of rich content that can be displayed in multiple formats.
182/// These are found in the `data` field of a `DisplayData` and `ExecuteResult` messages/output types.
183///
184#[derive(Default, Serialize, Deserialize, Debug, Clone)]
185pub struct Media {
186    /// A map of Media types to their corresponding data, represented as JSON `Value`.
187    #[serde(
188        flatten,
189        deserialize_with = "deserialize_media",
190        serialize_with = "serialize_media_for_wire"
191    )]
192    pub content: Vec<MediaType>,
193}
194
195fn deserialize_media<'de, D>(deserializer: D) -> Result<Vec<MediaType>, D::Error>
196where
197    D: serde::Deserializer<'de>,
198{
199    // Jupyter protocol does pure Map<String, Value> for media types.
200    // Our deserializer goes a step further by having enums that have their data fully typed
201    let map: HashMap<String, Value> = HashMap::deserialize(deserializer)?;
202    let mut content = Vec::new();
203
204    for (key, value) in map {
205        // Check if the key matches ^application/(.*\\+)?json$ in order to skip the multiline string handling
206        if key.starts_with("application/") && key.ends_with("json") {
207            let media_type =
208                match serde_json::from_value(Value::Object(serde_json::Map::from_iter([
209                    ("type".to_string(), Value::String(key.clone())),
210                    ("data".to_string(), value.clone()),
211                ]))) {
212                    Ok(mediatype) => mediatype,
213                    Err(_) => MediaType::Other((key, value)),
214                };
215            content.push(media_type);
216            continue;
217        }
218
219        // Now we know we're getting a plain string or an array of strings
220        let text: String = match value.clone() {
221            Value::String(s) => s,
222            Value::Array(arr) => arr
223                .into_iter()
224                .filter_map(|v| v.as_str().map(String::from))
225                .collect::<Vec<String>>()
226                .join(""),
227            _ => return Err(de::Error::custom("Invalid value for text-based media type")),
228        };
229
230        if key.starts_with("image/") && !key.starts_with("image/svg+xml") {
231            // If we ever want to turn this into Vec<u8> we could do that here. We would need to strip all the whitespace from the base64
232            // encoded image too though. `let text = text.replace("\n", "").replace(" ", "");`
233            // For consistency with other notebook frontends though, we'll keep it the same
234
235            let mediatype: MediaType = match key.as_str() {
236                "image/png" => MediaType::Png(text),
237                "image/jpeg" => MediaType::Jpeg(text),
238                "image/gif" => MediaType::Gif(text),
239                _ => MediaType::Other((key.clone(), value)),
240            };
241            content.push(mediatype);
242            continue;
243        }
244
245        let mediatype: MediaType = match key.as_str() {
246            "text/plain" => MediaType::Plain(text),
247            "text/html" => MediaType::Html(text),
248            "text/latex" => MediaType::Latex(text),
249            "application/javascript" => MediaType::Javascript(text),
250            "text/markdown" => MediaType::Markdown(text),
251            "image/svg+xml" => MediaType::Svg(text),
252
253            // Keep unknown mediatypes exactly as they were
254            _ => MediaType::Other((key.clone(), value)),
255        };
256
257        content.push(mediatype);
258    }
259
260    Ok(content)
261}
262
263pub fn serialize_media_for_wire<S>(
264    content: &Vec<MediaType>,
265    serializer: S,
266) -> Result<S::Ok, S::Error>
267where
268    S: serde::Serializer,
269{
270    serialize_media_with_options(content, serializer, false)
271}
272
273pub fn serialize_media_for_notebook<S>(media: &Media, serializer: S) -> Result<S::Ok, S::Error>
274where
275    S: serde::Serializer,
276{
277    serialize_media_with_options(&media.content, serializer, true)
278}
279
280pub fn serialize_media_with_options<S>(
281    content: &Vec<MediaType>,
282    serializer: S,
283    with_multiline: bool,
284) -> Result<S::Ok, S::Error>
285where
286    S: serde::Serializer,
287{
288    let mut map = HashMap::new();
289
290    for media_type in content {
291        let (key, value) = match media_type {
292            MediaType::Plain(text)
293            | MediaType::Html(text)
294            | MediaType::Latex(text)
295            | MediaType::Javascript(text)
296            | MediaType::Markdown(text)
297            | MediaType::Svg(text) => {
298                let key = match media_type {
299                    MediaType::Plain(_) => "text/plain",
300                    MediaType::Html(_) => "text/html",
301                    MediaType::Latex(_) => "text/latex",
302                    MediaType::Javascript(_) => "application/javascript",
303                    MediaType::Markdown(_) => "text/markdown",
304                    MediaType::Svg(_) => "image/svg+xml",
305                    _ => unreachable!(),
306                };
307                let value = if with_multiline {
308                    let lines: Vec<&str> = text.lines().collect();
309
310                    if lines.len() > 1 {
311                        let entries = lines
312                            .iter()
313                            .map(|line| Value::String(format!("{}\n", line)));
314
315                        Value::Array(entries.collect())
316                    } else {
317                        Value::Array(vec![Value::String(text.clone())])
318                    }
319                } else {
320                    Value::String(text.clone())
321                };
322                (key.to_string(), value)
323            }
324            // ** Treat images in a special way **
325            // Jupyter, in practice, will attempt to keep the multiline version of the image around if it was written in
326            // that way. We'd have to do extra tracking in order to keep this enum consistent, so this is an area
327            // where we may wish to diverge from practice (not protocol or schema, just practice).
328            //
329            // As an example, some frontends will convert images to base64 and then split them into 80 character chunks
330            // with newlines interspersed. We could perform the chunking but then in many cases we will no longer match.
331            MediaType::Jpeg(text) | MediaType::Png(text) | MediaType::Gif(text) => {
332                let key = match media_type {
333                    MediaType::Jpeg(_) => "image/jpeg",
334                    MediaType::Png(_) => "image/png",
335                    MediaType::Gif(_) => "image/gif",
336                    _ => unreachable!(),
337                };
338                let value = if with_multiline {
339                    let lines: Vec<&str> = text.lines().collect();
340
341                    if lines.len() > 1 {
342                        let entries = lines
343                            .iter()
344                            .map(|line| Value::String(format!("{}\n", line)));
345
346                        Value::Array(entries.collect())
347                    } else {
348                        Value::String(text.clone())
349                    }
350                } else {
351                    Value::String(text.clone())
352                };
353
354                (key.to_string(), value)
355            }
356            // Keep unknown media types as is
357            MediaType::Other((key, value)) => (key.clone(), value.clone()),
358            _ => {
359                let serialized =
360                    serde_json::to_value(media_type).map_err(serde::ser::Error::custom)?;
361                if let Value::Object(obj) = serialized {
362                    if let (Some(Value::String(key)), Some(data)) =
363                        (obj.get("type"), obj.get("data"))
364                    {
365                        (key.clone(), data.clone())
366                    } else {
367                        continue;
368                    }
369                } else {
370                    continue;
371                }
372            }
373        };
374        map.insert(key, value);
375    }
376
377    map.serialize(serializer)
378}
379
380impl Media {
381    /// Find the richest media type in the bundle, based on the provided ranker function.
382    /// A rank of 0 indicates that the media type is not supported. Higher numbers indicate
383    /// that the media type is preferred over other media types.
384    ///
385    /// # Examples
386    ///
387    /// ```rust
388    /// use jupyter_protocol::media::{Media, MediaType};
389    ///
390    /// let raw = r#"{
391    ///    "text/plain": "FancyThing()",
392    ///    "text/html": "<h1>Fancy!</h1>",
393    ///    "application/json": {"fancy": true}
394    /// }"#;
395    ///
396    /// let media: Media = serde_json::from_str(raw).unwrap();
397    ///
398    /// let ranker = |media_type: &MediaType| match media_type {
399    ///    MediaType::Html(_) => 3,
400    ///    MediaType::Json(_) => 2,
401    ///    MediaType::Plain(_) => 1,
402    ///    _ => 0,
403    /// };
404    ///
405    /// let richest = media.richest(ranker);
406    ///
407    /// assert_eq!(
408    ///    richest,
409    ///    Some(MediaType::Html(String::from("<h1>Fancy!</h1>"))).as_ref()
410    /// );
411    ///
412    /// ```
413    pub fn richest(&self, ranker: fn(&MediaType) -> usize) -> Option<&MediaType> {
414        self.content
415            .iter()
416            .filter_map(|mediatype| {
417                let rank = ranker(mediatype);
418                if rank > 0 {
419                    Some((rank, mediatype))
420                } else {
421                    None
422                }
423            })
424            .max_by_key(|(rank, _)| *rank)
425            .map(|(_, mediatype)| mediatype)
426    }
427
428    pub fn new(content: Vec<MediaType>) -> Self {
429        Self { content }
430    }
431}
432
433impl From<MediaType> for Media {
434    fn from(media_type: MediaType) -> Self {
435        Media {
436            content: vec![media_type],
437        }
438    }
439}
440
441impl From<Vec<MediaType>> for Media {
442    fn from(content: Vec<MediaType>) -> Self {
443        Media { content }
444    }
445}
446
447// Backwards compatibility with previous versions and Jupyter naming
448pub type MimeBundle = Media;
449pub type MimeType = MediaType;
450
451#[cfg(test)]
452mod test {
453    use datatable::TableSchemaField;
454    use serde_json::json;
455
456    use super::*;
457
458    #[test]
459    fn svg_deserialized_correctly() {
460        let raw = r#"{
461            "image/svg+xml": "<svg xmlns=\"http://www.w3.org/2000/svg\"><circle cx=\"50\" cy=\"50\" r=\"40\"/></svg>",
462            "text/plain": "<IPython.core.display.SVG object>"
463        }"#;
464
465        let bundle: Media = serde_json::from_str(raw).unwrap();
466
467        assert_eq!(bundle.content.len(), 2);
468        assert!(bundle
469            .content
470            .contains(&MediaType::Svg("<svg xmlns=\"http://www.w3.org/2000/svg\"><circle cx=\"50\" cy=\"50\" r=\"40\"/></svg>".to_string())));
471        assert!(bundle
472            .content
473            .contains(&MediaType::Plain("<IPython.core.display.SVG object>".to_string())));
474    }
475
476    #[test]
477    fn svg_array_deserialized_correctly() {
478        let raw = r#"{
479            "image/svg+xml": ["<svg>\n", "  <circle/>\n", "</svg>"],
480            "text/plain": "svg"
481        }"#;
482
483        let bundle: Media = serde_json::from_str(raw).unwrap();
484
485        assert_eq!(bundle.content.len(), 2);
486        assert!(bundle
487            .content
488            .contains(&MediaType::Svg("<svg>\n  <circle/>\n</svg>".to_string())));
489    }
490
491    #[test]
492    fn richest_middle() {
493        let raw = r#"{
494            "text/plain": "Hello, world!",
495            "text/html": "<h1>Hello, world!</h1>",
496            "application/json": {
497                "name": "John Doe",
498                "age": 30
499            },
500            "application/vnd.dataresource+json": {
501                "data": [
502                    {"name": "Alice", "age": 25},
503                    {"name": "Bob", "age": 35}
504                ],
505                "schema": {
506                    "fields": [
507                        {"name": "name", "type": "string"},
508                        {"name": "age", "type": "integer"}
509                    ]
510                }
511            },
512            "application/octet-stream": "Binary data"
513        }"#;
514
515        let bundle: Media = serde_json::from_str(raw).unwrap();
516
517        let ranker = |mediatype: &MediaType| match mediatype {
518            MediaType::Plain(_) => 1,
519            MediaType::Html(_) => 2,
520            _ => 0,
521        };
522
523        match bundle.richest(ranker) {
524            Some(MediaType::Html(data)) => assert_eq!(data, "<h1>Hello, world!</h1>"),
525            _ => panic!("Unexpected media type"),
526        }
527    }
528
529    #[test]
530    fn find_table() {
531        let raw = r#"{
532            "text/plain": "Hello, world!",
533            "text/html": "<h1>Hello, world!</h1>",
534            "application/json": {
535                "name": "John Doe",
536                "age": 30
537            },
538            "application/vnd.dataresource+json": {
539                "data": [
540                    {"name": "Alice", "age": 25},
541                    {"name": "Bob", "age": 35}
542                ],
543                "schema": {
544                    "fields": [
545                        {"name": "name", "type": "string"},
546                        {"name": "age", "type": "integer"}
547                    ]
548                }
549            },
550            "application/octet-stream": "Binary data"
551        }"#;
552
553        let bundle: Media = serde_json::from_str(raw).unwrap();
554
555        let ranker = |mediatype: &MediaType| match mediatype {
556            MediaType::Html(_) => 1,
557            MediaType::Json(_) => 2,
558            MediaType::DataTable(_) => 3,
559            _ => 0,
560        };
561
562        let richest = bundle.richest(ranker);
563
564        match richest {
565            Some(MediaType::DataTable(table)) => {
566                assert_eq!(
567                    table.data,
568                    Some(vec![
569                        json!({"name": "Alice", "age": 25}),
570                        json!({"name": "Bob", "age": 35})
571                    ])
572                );
573                assert_eq!(
574                    table.schema.fields,
575                    vec![
576                        TableSchemaField {
577                            name: "name".to_string(),
578                            field_type: datatable::FieldType::String,
579                            ..Default::default()
580                        },
581                        TableSchemaField {
582                            name: "age".to_string(),
583                            field_type: datatable::FieldType::Integer,
584                            ..Default::default()
585                        }
586                    ]
587                );
588            }
589            _ => panic!("Unexpected mime type"),
590        }
591    }
592
593    #[test]
594    fn find_nothing_and_be_happy() {
595        let raw = r#"{
596            "application/fancy": "Too ✨ Fancy ✨ for you!"
597        }"#;
598
599        let bundle: Media = serde_json::from_str(raw).unwrap();
600
601        let ranker = |mediatype: &MediaType| match mediatype {
602            MediaType::Html(_) => 1,
603            MediaType::Json(_) => 2,
604            MediaType::DataTable(_) => 3,
605            _ => 0,
606        };
607
608        let richest = bundle.richest(ranker);
609
610        assert_eq!(richest, None);
611
612        assert!(bundle.content.contains(&MediaType::Other((
613            "application/fancy".to_string(),
614            json!("Too ✨ Fancy ✨ for you!")
615        ))));
616    }
617
618    #[test]
619    fn no_media_type_supported() {
620        let raw = r#"{
621            "text/plain": "Hello, world!",
622            "text/html": "<h1>Hello, world!</h1>",
623            "application/json": {
624                "name": "John Doe",
625                "age": 30
626            },
627            "application/vnd.dataresource+json": {
628                "data": [
629                    {"name": "Alice", "age": 25},
630                    {"name": "Bob", "age": 35}
631                ],
632                "schema": {
633                    "fields": [
634                        {"name": "name", "type": "string"},
635                        {"name": "age", "type": "integer"}
636                    ]
637                }
638            },
639            "application/octet-stream": "Binary data"
640        }"#;
641
642        let bundle: Media = serde_json::from_str(raw).unwrap();
643        let richest = bundle.richest(|_| 0);
644        assert_eq!(richest, None);
645    }
646
647    #[test]
648    fn ensure_array_of_text_processed() {
649        let raw = r#"{
650            "text/plain": ["Hello, world!"],
651            "text/html": "<h1>Hello, world!</h1>"
652        }"#;
653
654        let bundle: Media = serde_json::from_str(raw).unwrap();
655
656        assert_eq!(bundle.content.len(), 2);
657        assert!(bundle
658            .content
659            .contains(&MediaType::Plain("Hello, world!".to_string())));
660        assert!(bundle
661            .content
662            .contains(&MediaType::Html("<h1>Hello, world!</h1>".to_string())));
663
664        let raw = r#"{
665            "text/plain": ["Hello, world!\n", "Welcome to zombo.com"],
666            "text/html": ["<h1>\n", "  Hello, world!\n", "</h1>"]
667        }"#;
668
669        let bundle: Media = serde_json::from_str(raw).unwrap();
670
671        assert_eq!(bundle.content.len(), 2);
672        assert!(bundle.content.contains(&MediaType::Plain(
673            "Hello, world!\nWelcome to zombo.com".to_string()
674        )));
675        assert!(bundle
676            .content
677            .contains(&MediaType::Html("<h1>\n  Hello, world!\n</h1>".to_string())));
678    }
679}