1use std::collections::BTreeMap;
27
28use carta_ast::{
29 ApiVersion, Attr, Block, Document, Format, Inline, MetaValue, Target, ToCompactString,
30};
31use carta_core::media::{base64_decode, content_addressed_name};
32use carta_core::{Error, MediaBag, Reader, ReaderOptions, Result};
33use serde_json::Value;
34
35use crate::commonmark::CommonmarkReader;
36
37#[derive(Debug, Default, Clone, Copy)]
39pub struct IpynbReader;
40
41impl Reader for IpynbReader {
42 fn read(&self, input: &str, options: &ReaderOptions) -> Result<Document> {
43 self.read_media(input, options)
44 .map(|(document, _)| document)
45 }
46
47 fn read_media(&self, input: &str, options: &ReaderOptions) -> Result<(Document, MediaBag)> {
48 let notebook: Value = serde_json::from_str(input)?;
49 let nbformat = notebook
50 .get("nbformat")
51 .and_then(Value::as_i64)
52 .unwrap_or(4);
53 if nbformat < 4 {
54 return Err(Error::UnsupportedFormat(format!(
55 "notebook format version {nbformat} (only nbformat 4 and later are read)"
56 )));
57 }
58 let nbformat_minor = notebook
59 .get("nbformat_minor")
60 .and_then(Value::as_i64)
61 .unwrap_or(0);
62 let language = notebook_language(¬ebook);
63 let meta = build_meta(¬ebook, nbformat, nbformat_minor);
64
65 let mut media = MediaBag::new();
66 let mut blocks = Vec::new();
67 if let Some(Value::Array(cells)) = notebook.get("cells") {
68 for cell in cells {
69 if let Some(block) = cell_to_block(cell, &language, options, &mut media)? {
70 blocks.push(block);
71 }
72 }
73 }
74 let document = Document {
75 api_version: ApiVersion::default(),
76 meta: meta.into_iter().map(|(k, v)| (k.into(), v)).collect(),
77 blocks,
78 };
79 Ok((document, media))
80 }
81}
82
83fn notebook_language(notebook: &Value) -> String {
86 notebook
87 .get("metadata")
88 .and_then(|metadata| metadata.get("kernelspec"))
89 .and_then(|kernelspec| kernelspec.get("language"))
90 .and_then(Value::as_str)
91 .unwrap_or("python")
92 .to_owned()
93}
94
95fn build_meta(notebook: &Value, nbformat: i64, nbformat_minor: i64) -> BTreeMap<String, MetaValue> {
98 let mut jupyter: BTreeMap<String, MetaValue> = BTreeMap::new();
99 if let Some(Value::Object(metadata)) = notebook.get("metadata") {
100 for (key, value) in metadata {
101 jupyter.insert(key.clone(), meta_value(value));
102 }
103 }
104 jupyter.insert(
105 "nbformat".to_owned(),
106 MetaValue::MetaString(nbformat.to_compact_string()),
107 );
108 jupyter.insert(
109 "nbformat_minor".to_owned(),
110 MetaValue::MetaString(nbformat_minor.to_compact_string()),
111 );
112 let mut meta = BTreeMap::new();
113 meta.insert(
114 "jupyter".to_owned(),
115 MetaValue::MetaMap(jupyter.into_iter().map(|(k, v)| (k.into(), v)).collect()),
116 );
117 meta
118}
119
120fn meta_value(value: &Value) -> MetaValue {
126 match value {
127 Value::Null => MetaValue::MetaString(carta_ast::Text::default()),
128 Value::Bool(flag) => MetaValue::MetaBool(*flag),
129 Value::Number(number) => MetaValue::MetaString(meta_number(number).into()),
130 Value::String(text) => MetaValue::MetaString(text.clone().into()),
131 Value::Array(items) => MetaValue::MetaList(items.iter().map(meta_value).collect()),
132 Value::Object(map) => MetaValue::MetaMap(
133 map.iter()
134 .map(|(key, value)| (key.clone().into(), meta_value(value)))
135 .collect(),
136 ),
137 }
138}
139
140fn meta_number(number: &serde_json::Number) -> String {
144 if let Some(integer) = number.as_i64() {
145 return integer.to_string();
146 }
147 if let Some(integer) = number.as_u64() {
148 return integer.to_string();
149 }
150 match number.as_f64() {
151 Some(value) if value.is_finite() && value.fract() == 0.0 => integer_string(value),
152 Some(value) => general_decimal(value),
153 None => number.to_string(),
154 }
155}
156
157fn json_number(number: &serde_json::Number) -> String {
161 if let Some(integer) = number.as_i64() {
162 return integer.to_string();
163 }
164 if let Some(integer) = number.as_u64() {
165 return integer.to_string();
166 }
167 match number.as_f64() {
168 Some(value) => general_decimal(value),
169 None => number.to_string(),
170 }
171}
172
173fn integer_string(value: f64) -> String {
176 if value == 0.0 {
177 return "0".to_owned();
178 }
179 format!("{value}")
180}
181
182fn general_decimal(value: f64) -> String {
186 if value == 0.0 {
187 return "0.0".to_owned();
188 }
189 let (digits, exponent) = shortest_digits(value.abs());
190 let body = if (-1..=6).contains(&exponent) {
191 fixed_notation(&digits, exponent)
192 } else {
193 scientific_notation(&digits, exponent)
194 };
195 if value.is_sign_negative() {
196 format!("-{body}")
197 } else {
198 body
199 }
200}
201
202fn shortest_digits(magnitude: f64) -> (String, i32) {
206 let formatted = format!("{magnitude:e}");
207 let (mantissa, exponent) = match formatted.split_once('e') {
208 Some((mantissa, exponent)) => (mantissa, exponent.parse::<i32>().unwrap_or(0)),
209 None => (formatted.as_str(), 0),
210 };
211 let digits = mantissa.chars().filter(char::is_ascii_digit).collect();
212 (digits, exponent)
213}
214
215fn fixed_notation(digits: &str, exponent: i32) -> String {
219 if exponent < 0 {
220 let leading_zeros = usize::try_from((-exponent - 1).max(0)).unwrap_or(0);
221 return format!("0.{}{digits}", "0".repeat(leading_zeros));
222 }
223 let integer_len = usize::try_from(exponent).unwrap_or(0) + 1;
224 if digits.len() <= integer_len {
225 let trailing_zeros = integer_len - digits.len();
226 format!("{digits}{}.0", "0".repeat(trailing_zeros))
227 } else {
228 let (integer_part, fraction) = digits.split_at(integer_len);
229 format!("{integer_part}.{fraction}")
230 }
231}
232
233fn scientific_notation(digits: &str, exponent: i32) -> String {
236 let (first, rest) = digits.split_at(1.min(digits.len()));
237 let mantissa = if rest.is_empty() {
238 format!("{first}.0")
239 } else {
240 format!("{first}.{rest}")
241 };
242 format!("{mantissa}e{exponent}")
243}
244
245fn json_render(value: &Value) -> String {
248 let mut out = String::new();
249 json_write(value, &mut out);
250 out
251}
252
253fn json_write(value: &Value, out: &mut String) {
254 match value {
255 Value::Number(number) => out.push_str(&json_number(number)),
256 Value::Array(items) => {
257 out.push('[');
258 for (index, item) in items.iter().enumerate() {
259 if index != 0 {
260 out.push(',');
261 }
262 json_write(item, out);
263 }
264 out.push(']');
265 }
266 Value::Object(map) => {
267 out.push('{');
268 for (index, (key, item)) in map.iter().enumerate() {
269 if index != 0 {
270 out.push(',');
271 }
272 out.push_str(&Value::String(key.clone()).to_string());
273 out.push(':');
274 json_write(item, out);
275 }
276 out.push('}');
277 }
278 other => out.push_str(&other.to_string()),
279 }
280}
281
282fn cell_to_block(
286 cell: &Value,
287 language: &str,
288 options: &ReaderOptions,
289 media: &mut MediaBag,
290) -> Result<Option<Block>> {
291 let Some(kind) = cell.get("cell_type").and_then(Value::as_str) else {
292 return Ok(None);
293 };
294 let attr = cell_attr(cell, kind);
295 let block = match kind {
296 "markdown" => Block::Div(Box::new(attr), markdown_cell_blocks(cell, options, media)?),
297 "code" => Block::Div(Box::new(attr), code_cell_blocks(cell, language, media)),
298 "raw" => Block::Div(Box::new(attr), vec![raw_cell_block(cell)]),
299 _ => return Ok(None),
300 };
301 Ok(Some(block))
302}
303
304fn cell_attr(cell: &Value, kind: &str) -> Attr {
307 let id = cell
308 .get("id")
309 .and_then(Value::as_str)
310 .unwrap_or_default()
311 .to_owned();
312 let classes = vec!["cell".to_owned(), kind.to_owned()];
313 let mut attributes = Vec::new();
314 if kind == "code"
315 && let Some(count) = cell.get("execution_count").and_then(Value::as_i64)
316 {
317 attributes.push(("execution_count".to_owned(), count.to_string()));
318 }
319 if let Some(Value::Object(metadata)) = cell.get("metadata") {
320 for (key, value) in metadata {
321 attributes.push((key.clone(), attribute_value(value)));
322 }
323 }
324 Attr {
325 id: id.into(),
326 classes: classes.into_iter().map(Into::into).collect(),
327 attributes: attributes
328 .into_iter()
329 .map(|(k, v)| (k.into(), v.into()))
330 .collect(),
331 }
332}
333
334fn attribute_value(value: &Value) -> String {
340 match value {
341 Value::String(text)
342 if text.is_empty() || is_integer_literal(text) || text == "true" || text == "false" =>
343 {
344 format!("\"{text}\"")
345 }
346 Value::String(text) => text.clone(),
347 other => json_render(other),
348 }
349}
350
351fn is_integer_literal(text: &str) -> bool {
353 !text.is_empty() && text.bytes().all(|byte| byte.is_ascii_digit())
354}
355
356fn markdown_cell_blocks(
360 cell: &Value,
361 options: &ReaderOptions,
362 media: &mut MediaBag,
363) -> Result<Vec<Block>> {
364 let source = multiline_text(cell.get("source"));
365 let mut markdown_options = ReaderOptions::default();
366 markdown_options.extensions = options.extensions;
367 markdown_options.greedy_paragraphs = true;
373 let mut blocks = CommonmarkReader.read(&source, &markdown_options)?.blocks;
374 let prefix = cell
375 .get("id")
376 .map(|id| format!("{}-", id.as_str().unwrap_or_default()));
377 capture_attachments(cell, prefix.as_deref(), media);
378 let prefix = prefix.as_deref().unwrap_or_default();
379 carta_core::walk::for_each_image_target(&mut blocks, &mut |target| {
380 if let Some(bare) = target.url.strip_prefix("attachment:") {
381 target.url = format!("{prefix}{bare}").into();
382 }
383 });
384 Ok(blocks)
385}
386
387fn capture_attachments(cell: &Value, prefix: Option<&str>, media: &mut MediaBag) {
393 let Some(Value::Object(attachments)) = cell.get("attachments") else {
394 return;
395 };
396 for (reference, bundle) in attachments {
397 let Value::Object(by_mime) = bundle else {
398 continue;
399 };
400 let chosen = by_mime
401 .iter()
402 .find(|(mime, _)| is_image_like(mime))
403 .or_else(|| by_mime.iter().next());
404 let Some((mime, payload)) = chosen else {
405 continue;
406 };
407 let name = match prefix {
408 Some(prefix) => format!("{prefix}{reference}"),
409 None => reference.clone(),
410 };
411 media.insert(name, Some(mime.clone()), decode_payload(mime, payload));
412 }
413}
414
415fn code_cell_blocks(cell: &Value, language: &str, media: &mut MediaBag) -> Vec<Block> {
418 let source = multiline_text(cell.get("source"));
419 let source_attr = Attr {
420 id: carta_ast::Text::default(),
421 classes: vec![language.into()],
422 attributes: Vec::new(),
423 };
424 let mut blocks = vec![Block::CodeBlock(Box::new(source_attr), source.into())];
425 if let Some(Value::Array(outputs)) = cell.get("outputs") {
426 for output in outputs {
427 if let Some(block) = output_to_block(output, media) {
428 blocks.push(block);
429 }
430 }
431 }
432 blocks
433}
434
435fn raw_cell_block(cell: &Value) -> Block {
439 let source = multiline_text(cell.get("source"));
440 let metadata = cell.get("metadata");
441 let mime = metadata
442 .and_then(|metadata| metadata.get("raw_mimetype"))
443 .or_else(|| metadata.and_then(|metadata| metadata.get("format")))
444 .and_then(Value::as_str);
445 let format = mime.map_or_else(|| "ipynb".to_owned(), format_from_mime);
446 Block::RawBlock(Format(format.into()), source.into())
447}
448
449fn output_to_block(output: &Value, media: &mut MediaBag) -> Option<Block> {
452 match output.get("output_type").and_then(Value::as_str)? {
453 "stream" => Some(stream_output(output)),
454 "execute_result" => Some(result_output(output, true, media)),
455 "display_data" => Some(result_output(output, false, media)),
456 "error" => Some(error_output(output)),
457 _ => None,
458 }
459}
460
461fn stream_output(output: &Value) -> Block {
464 let name = output
465 .get("name")
466 .and_then(Value::as_str)
467 .unwrap_or("stdout");
468 let text = strip_ansi(&multiline_text(output.get("text")));
469 let attr = Attr {
470 id: carta_ast::Text::default(),
471 classes: vec!["output".into(), "stream".into(), name.into()],
472 attributes: Vec::new(),
473 };
474 Block::Div(
475 Box::new(attr),
476 vec![Block::CodeBlock(Box::default(), text.into())],
477 )
478}
479
480fn result_output(output: &Value, is_result: bool, media: &mut MediaBag) -> Block {
483 let kind = if is_result {
484 "execute_result"
485 } else {
486 "display_data"
487 };
488 let mut attributes = Vec::new();
489 if is_result && let Some(count) = output.get("execution_count").and_then(Value::as_i64) {
490 attributes.push(("execution_count".to_owned(), count.to_string()));
491 }
492 let attr = Attr {
493 id: carta_ast::Text::default(),
494 classes: vec!["output".into(), kind.into()],
495 attributes: attributes
496 .into_iter()
497 .map(|(k, v)| (k.into(), v.into()))
498 .collect(),
499 };
500 Block::Div(
501 Box::new(attr),
502 data_to_blocks(output.get("data"), output.get("metadata"), media),
503 )
504}
505
506fn error_output(output: &Value) -> Block {
509 let ename = output
510 .get("ename")
511 .and_then(Value::as_str)
512 .unwrap_or_default()
513 .to_owned();
514 let evalue = output
515 .get("evalue")
516 .and_then(Value::as_str)
517 .unwrap_or_default()
518 .to_owned();
519 let traceback = match output.get("traceback") {
520 Some(Value::Array(lines)) => {
521 let joined = lines
522 .iter()
523 .filter_map(Value::as_str)
524 .collect::<Vec<_>>()
525 .join("\n");
526 format!("{joined}\n")
527 }
528 Some(Value::String(text)) => text.clone(),
529 _ => String::new(),
530 };
531 let attr = Attr {
532 id: carta_ast::Text::default(),
533 classes: vec!["output".into(), "error".into()],
534 attributes: vec![
535 ("ename".into(), ename.into()),
536 ("evalue".into(), evalue.into()),
537 ],
538 };
539 Block::Div(
540 Box::new(attr),
541 vec![Block::CodeBlock(
542 Box::default(),
543 strip_ansi(&traceback).into(),
544 )],
545 )
546}
547
548fn data_to_blocks(
554 data: Option<&Value>,
555 metadata: Option<&Value>,
556 media: &mut MediaBag,
557) -> Vec<Block> {
558 let Some(Value::Object(data)) = data else {
559 return Vec::new();
560 };
561 if let Some((mime, value)) = data.iter().find(|(mime, _)| is_image_like(mime)) {
562 return vec![image_block(mime, value, metadata, media)];
563 }
564 if let Some((mime, value)) = data.iter().find(|(mime, _)| is_json_like(mime)) {
565 return vec![non_image_block(mime, value)];
566 }
567 for mime in ["text/plain", "text/html", "text/latex", "text/markdown"] {
568 if let Some(value) = data.get(mime) {
569 return vec![non_image_block(mime, value)];
570 }
571 }
572 Vec::new()
573}
574
575fn non_image_block(mime: &str, value: &Value) -> Block {
579 if is_json_like(mime) {
580 return Block::CodeBlock(
581 Box::new(Attr {
582 id: carta_ast::Text::default(),
583 classes: vec!["json".into()],
584 attributes: Vec::new(),
585 }),
586 json_render(value).into(),
587 );
588 }
589 match mime {
590 "text/html" => Block::RawBlock(Format("html".into()), multiline_text(Some(value)).into()),
591 "text/latex" => Block::RawBlock(Format("latex".into()), multiline_text(Some(value)).into()),
592 "text/markdown" => Block::RawBlock(
593 Format("markdown".into()),
594 multiline_text(Some(value)).into(),
595 ),
596 _ => Block::CodeBlock(
598 Box::default(),
599 strip_ansi(&multiline_text(Some(value))).into(),
600 ),
601 }
602}
603
604fn image_block(mime: &str, value: &Value, metadata: Option<&Value>, media: &mut MediaBag) -> Block {
608 let bytes = decode_payload(mime, value);
609 let name = content_addressed_name(mime, &bytes);
610 media.insert(name.clone(), Some(mime.to_owned()), bytes);
611 Block::Para(vec![Inline::Image(
612 Box::new(image_attr(mime, metadata)),
613 Vec::new(),
614 Box::new(Target {
615 url: name.into(),
616 title: carta_ast::Text::default(),
617 }),
618 )])
619}
620
621fn decode_payload(mime: &str, value: &Value) -> Vec<u8> {
625 let payload = multiline_text(Some(value));
626 if mime == "image/svg+xml" {
627 payload.into_bytes()
628 } else {
629 base64_decode(&payload).unwrap_or_else(|| payload.into_bytes())
630 }
631}
632
633fn image_attr(mime: &str, metadata: Option<&Value>) -> Attr {
636 let mut attributes = Vec::new();
637 if let Some(Value::Object(by_mime)) = metadata
638 && let Some(Value::Object(entry)) = by_mime.get(mime)
639 {
640 for (key, value) in entry {
641 attributes.push((key.clone(), attribute_value(value)));
642 }
643 }
644 Attr {
645 id: carta_ast::Text::default(),
646 classes: Vec::new(),
647 attributes: attributes
648 .into_iter()
649 .map(|(k, v)| (k.into(), v.into()))
650 .collect(),
651 }
652}
653
654fn is_image_like(mime: &str) -> bool {
657 mime.starts_with("image/") || mime == "application/pdf"
658}
659
660fn is_json_like(mime: &str) -> bool {
663 mime == "application/json" || mime.ends_with("+json")
664}
665
666fn format_from_mime(mime: &str) -> String {
669 match mime {
670 "text/html" => "html",
671 "text/latex" | "application/pdf" => "latex",
672 "text/markdown" => "markdown",
673 "text/restructuredtext" | "text/x-rst" => "rst",
674 "text/asciidoc" => "asciidoc",
675 other => other,
676 }
677 .to_owned()
678}
679
680fn multiline_text(value: Option<&Value>) -> String {
683 match value {
684 Some(Value::String(text)) => text.clone(),
685 Some(Value::Array(lines)) => lines.iter().filter_map(Value::as_str).collect(),
686 _ => String::new(),
687 }
688}
689
690fn strip_ansi(text: &str) -> String {
693 let mut out = String::with_capacity(text.len());
694 let mut chars = text.chars().peekable();
695 while let Some(ch) = chars.next() {
696 if ch != '\u{1b}' {
697 out.push(ch);
698 continue;
699 }
700 if chars.peek() == Some(&'[') {
701 chars.next();
702 for byte in chars.by_ref() {
703 if ('\u{40}'..='\u{7e}').contains(&byte) {
704 break;
705 }
706 }
707 }
708 }
709 out
710}
711
712#[cfg(test)]
713mod tests {
714 use super::*;
715 use carta_core::MediaBag;
716
717 fn read(input: &str) -> Document {
718 IpynbReader
719 .read(input, &ReaderOptions::default())
720 .expect("notebook input parses")
721 }
722
723 fn read_with(input: &str, extensions: carta_core::Extensions) -> Document {
724 let mut options = ReaderOptions::default();
725 options.extensions = extensions;
726 IpynbReader.read(input, &options).expect("notebook parses")
727 }
728
729 fn read_media(input: &str) -> (Document, MediaBag) {
730 IpynbReader
731 .read_media(input, &ReaderOptions::default())
732 .expect("notebook input parses")
733 }
734
735 fn jupyter(document: &Document) -> &BTreeMap<carta_ast::Text, MetaValue> {
736 match document.meta.get("jupyter") {
737 Some(MetaValue::MetaMap(map)) => map,
738 _ => panic!("expected a jupyter metadata map"),
739 }
740 }
741
742 #[test]
743 fn empty_notebook_exposes_only_version_metadata() {
744 let document = read(r#"{"cells": [], "metadata": {}, "nbformat": 4, "nbformat_minor": 5}"#);
745 assert!(document.blocks.is_empty());
746 let map = jupyter(&document);
747 assert_eq!(
748 map.get("nbformat"),
749 Some(&MetaValue::MetaString("4".to_owned().into()))
750 );
751 assert_eq!(
752 map.get("nbformat_minor"),
753 Some(&MetaValue::MetaString("5".to_owned().into()))
754 );
755 }
756
757 #[test]
758 fn missing_minor_version_defaults_to_zero() {
759 let document = read(r#"{"cells": [], "metadata": {}, "nbformat": 4}"#);
760 assert_eq!(
761 jupyter(&document).get("nbformat_minor"),
762 Some(&MetaValue::MetaString("0".to_owned().into()))
763 );
764 }
765
766 #[test]
767 fn metadata_scalars_normalize_and_recurse() {
768 let document = read(
769 r#"{"cells": [], "metadata": {"afloat": 3.0, "aint": 7, "abool": true,
770 "anull": null, "alist": [1, "two", 3.0], "amap": {"z": 1, "a": 2.0}},
771 "nbformat": 4, "nbformat_minor": 5}"#,
772 );
773 let map = jupyter(&document);
774 assert_eq!(
775 map.get("afloat"),
776 Some(&MetaValue::MetaString("3".to_owned().into()))
777 );
778 assert_eq!(
779 map.get("aint"),
780 Some(&MetaValue::MetaString("7".to_owned().into()))
781 );
782 assert_eq!(map.get("abool"), Some(&MetaValue::MetaBool(true)));
783 assert_eq!(
784 map.get("anull"),
785 Some(&MetaValue::MetaString(carta_ast::Text::default()))
786 );
787 assert_eq!(
788 map.get("alist"),
789 Some(&MetaValue::MetaList(vec![
790 MetaValue::MetaString("1".to_owned().into()),
791 MetaValue::MetaString("two".to_owned().into()),
792 MetaValue::MetaString("3".to_owned().into()),
793 ]))
794 );
795 let Some(MetaValue::MetaMap(nested)) = map.get("amap") else {
796 panic!("expected a nested map");
797 };
798 assert_eq!(
799 nested.get("a"),
800 Some(&MetaValue::MetaString("2".to_owned().into()))
801 );
802 assert_eq!(
803 nested.get("z"),
804 Some(&MetaValue::MetaString("1".to_owned().into()))
805 );
806 }
807
808 #[test]
809 fn markdown_cell_becomes_a_div_with_parsed_blocks() {
810 let document = read(
811 r##"{"cells": [{"cell_type": "markdown", "id": "m1", "metadata": {},
812 "source": ["# Title\n", "\n", "text"]}],
813 "metadata": {}, "nbformat": 4, "nbformat_minor": 5}"##,
814 );
815 let Some(Block::Div(attr, blocks)) = document.blocks.first() else {
816 panic!("expected a cell div");
817 };
818 assert_eq!(attr.id, "m1");
819 assert_eq!(attr.classes, vec!["cell".to_owned(), "markdown".to_owned()]);
820 assert!(matches!(blocks.first(), Some(Block::Header(1, _, _))));
821 assert!(matches!(blocks.get(1), Some(Block::Para(_))));
822 }
823
824 #[test]
825 fn markdown_cell_honors_forwarded_extensions() {
826 let input = r#"{"cells": [{"cell_type": "markdown", "metadata": {},
829 "source": ["| a | b |\n|---|---|\n| 1 | 2 |\n"]}],
830 "metadata": {}, "nbformat": 4, "nbformat_minor": 5}"#;
831 let with_tables = read_with(input, carta_core::presets::GFM);
832 let Some(Block::Div(_, blocks)) = with_tables.blocks.first() else {
833 panic!("expected a cell div");
834 };
835 assert!(matches!(blocks.first(), Some(Block::Table(_))));
836
837 let strict = read_with(input, carta_core::Extensions::empty());
838 let Some(Block::Div(_, blocks)) = strict.blocks.first() else {
839 panic!("expected a cell div");
840 };
841 assert!(!matches!(blocks.first(), Some(Block::Table(_))));
842 }
843
844 #[test]
845 fn markdown_attachment_prefix_is_stripped_from_images() {
846 let document = read(
847 r#"{"cells": [{"cell_type": "markdown", "metadata": {},
848 "attachments": {"a.png": {"image/png": "x"}},
849 "source": [""]}],
850 "metadata": {}, "nbformat": 4, "nbformat_minor": 5}"#,
851 );
852 let Some(Block::Div(_, blocks)) = document.blocks.first() else {
853 panic!("expected a cell div");
854 };
855 let Some(Block::Para(inlines)) = blocks.first() else {
856 panic!("expected a paragraph");
857 };
858 let Some(Inline::Image(_, _, target)) = inlines.first() else {
859 panic!("expected an image");
860 };
861 assert_eq!(target.url, "a.png");
863 }
864
865 #[test]
866 fn markdown_attachment_reference_is_scoped_to_the_cell_id() {
867 let document = read(
868 r#"{"cells": [{"cell_type": "markdown", "id": "cell9", "metadata": {},
869 "attachments": {"a.png": {"image/png": "x"}},
870 "source": [""]}],
871 "metadata": {}, "nbformat": 4, "nbformat_minor": 5}"#,
872 );
873 let Some(Block::Div(_, blocks)) = document.blocks.first() else {
874 panic!("expected a cell div");
875 };
876 let Some(Block::Para(inlines)) = blocks.first() else {
877 panic!("expected a paragraph");
878 };
879 let Some(Inline::Image(_, _, target)) = inlines.first() else {
880 panic!("expected an image");
881 };
882 assert_eq!(target.url, "cell9-a.png");
884 }
885
886 #[test]
887 fn code_cell_emits_source_then_outputs() {
888 let document = read(
889 r#"{"cells": [{"cell_type": "code", "metadata": {"scrolled": true},
890 "execution_count": 5, "source": ["import os\n", "print(1)"],
891 "outputs": [
892 {"output_type": "stream", "name": "stdout", "text": ["hello\n"]},
893 {"output_type": "execute_result", "execution_count": 5,
894 "data": {"text/plain": ["42"]}, "metadata": {}},
895 {"output_type": "error", "ename": "E", "evalue": "v",
896 "traceback": ["line1", "line2"]}
897 ]}],
898 "metadata": {"kernelspec": {"language": "python"}},
899 "nbformat": 4, "nbformat_minor": 5}"#,
900 );
901 let Some(Block::Div(attr, blocks)) = document.blocks.first() else {
902 panic!("expected a cell div");
903 };
904 assert_eq!(
905 attr.attributes,
906 vec![
907 ("execution_count".into(), "5".into()),
908 ("scrolled".into(), "true".into()),
909 ]
910 );
911 let Some(Block::CodeBlock(source_attr, source)) = blocks.first() else {
913 panic!("expected a source code block");
914 };
915 assert_eq!(source_attr.classes, vec!["python".to_owned()]);
916 assert_eq!(source, "import os\nprint(1)");
917
918 let Some(Block::Div(stream_attr, stream_body)) = blocks.get(1) else {
920 panic!("expected a stream div");
921 };
922 assert_eq!(
923 stream_attr.classes,
924 vec![
925 "output".to_owned(),
926 "stream".to_owned(),
927 "stdout".to_owned()
928 ]
929 );
930 assert!(matches!(
931 stream_body.first(),
932 Some(Block::CodeBlock(_, text)) if text == "hello\n"
933 ));
934
935 let Some(Block::Div(result_attr, result_body)) = blocks.get(2) else {
937 panic!("expected a result div");
938 };
939 assert_eq!(
940 result_attr.classes,
941 vec!["output".to_owned(), "execute_result".to_owned()]
942 );
943 assert_eq!(
944 result_attr.attributes,
945 vec![("execution_count".into(), "5".into())]
946 );
947 assert!(matches!(
948 result_body.first(),
949 Some(Block::CodeBlock(_, text)) if text == "42"
950 ));
951
952 let Some(Block::Div(error_attr, error_body)) = blocks.get(3) else {
954 panic!("expected an error div");
955 };
956 assert_eq!(
957 error_attr.attributes,
958 vec![("ename".into(), "E".into()), ("evalue".into(), "v".into()),]
959 );
960 assert!(matches!(
961 error_body.first(),
962 Some(Block::CodeBlock(_, text)) if text == "line1\nline2\n"
963 ));
964 }
965
966 #[test]
967 fn null_execution_count_yields_no_attribute() {
968 let document = read(
969 r#"{"cells": [{"cell_type": "code", "metadata": {}, "execution_count": null,
970 "source": [], "outputs": []}],
971 "metadata": {}, "nbformat": 4, "nbformat_minor": 5}"#,
972 );
973 let Some(Block::Div(attr, _)) = document.blocks.first() else {
974 panic!("expected a cell div");
975 };
976 assert!(attr.attributes.is_empty());
977 }
978
979 #[test]
980 fn image_output_is_content_addressed() {
981 let document = read(
983 r#"{"cells": [{"cell_type": "code", "metadata": {}, "execution_count": 1,
984 "source": [], "outputs": [
985 {"output_type": "display_data", "data": {"image/png": "iVBORw0KGgoAAAANSUhEUg=="},
986 "metadata": {}}]}],
987 "metadata": {}, "nbformat": 4, "nbformat_minor": 5}"#,
988 );
989 let Some(Block::Div(_, body)) = first_output(&document) else {
990 panic!("expected an output div");
991 };
992 let Some(Block::Para(inlines)) = body.first() else {
993 panic!("expected a paragraph");
994 };
995 let Some(Inline::Image(_, _, target)) = inlines.first() else {
996 panic!("expected an image");
997 };
998 assert_eq!(target.url, "22f545ac6b50163ce39bac49094c3f64e0858403.png");
999
1000 let svg = read(
1001 r#"{"cells": [{"cell_type": "code", "metadata": {}, "execution_count": 1,
1002 "source": [], "outputs": [
1003 {"output_type": "display_data", "data": {"image/svg+xml": ["<svg/>"]},
1004 "metadata": {}}]}],
1005 "metadata": {}, "nbformat": 4, "nbformat_minor": 5}"#,
1006 );
1007 let Some(Block::Div(_, body)) = first_output(&svg) else {
1008 panic!("expected an output div");
1009 };
1010 let Some(Block::Para(inlines)) = body.first() else {
1011 panic!("expected a paragraph");
1012 };
1013 let Some(Inline::Image(_, _, target)) = inlines.first() else {
1014 panic!("expected an image");
1015 };
1016 assert_eq!(target.url, "1c3ba3b813e1080e9721846f23a21c09e5c3fd27.svg");
1017 }
1018
1019 #[test]
1020 fn image_output_bytes_are_lifted_into_the_media_bag() {
1021 let (document, media) = read_media(
1022 r#"{"cells": [{"cell_type": "code", "metadata": {}, "execution_count": 1,
1023 "source": [], "outputs": [
1024 {"output_type": "display_data", "data": {"image/png": "iVBORw0KGgoAAAANSUhEUg=="},
1025 "metadata": {}}]}],
1026 "metadata": {}, "nbformat": 4, "nbformat_minor": 5}"#,
1027 );
1028 let Some(Block::Div(_, body)) = first_output(&document) else {
1030 panic!("expected an output div");
1031 };
1032 let Some(Block::Para(inlines)) = body.first() else {
1033 panic!("expected a paragraph");
1034 };
1035 let Some(Inline::Image(_, _, target)) = inlines.first() else {
1036 panic!("expected an image");
1037 };
1038 let name = "22f545ac6b50163ce39bac49094c3f64e0858403.png";
1039 assert_eq!(target.url, name);
1040 assert_eq!(media.len(), 1);
1042 let item = media.get(name).expect("image is in the bag");
1043 assert_eq!(item.mime.as_deref(), Some("image/png"));
1044 assert_eq!(
1045 item.bytes,
1046 carta_core::media::base64_decode("iVBORw0KGgoAAAANSUhEUg==").unwrap()
1047 );
1048 }
1049
1050 #[test]
1051 fn svg_output_is_stored_as_its_source_bytes() {
1052 let (_, media) = read_media(
1053 r#"{"cells": [{"cell_type": "code", "metadata": {}, "execution_count": 1,
1054 "source": [], "outputs": [
1055 {"output_type": "display_data", "data": {"image/svg+xml": ["<svg/>"]},
1056 "metadata": {}}]}],
1057 "metadata": {}, "nbformat": 4, "nbformat_minor": 5}"#,
1058 );
1059 let name = "1c3ba3b813e1080e9721846f23a21c09e5c3fd27.svg";
1060 let item = media.get(name).expect("svg is in the bag");
1061 assert_eq!(item.mime.as_deref(), Some("image/svg+xml"));
1062 assert_eq!(item.bytes, b"<svg/>");
1063 }
1064
1065 #[test]
1066 fn identical_image_outputs_share_one_bag_entry() {
1067 let (_, media) = read_media(
1068 r#"{"cells": [{"cell_type": "code", "metadata": {}, "execution_count": 1,
1069 "source": [], "outputs": [
1070 {"output_type": "display_data", "data": {"image/png": "iVBORw0KGgoAAAANSUhEUg=="},
1071 "metadata": {}},
1072 {"output_type": "display_data", "data": {"image/png": "iVBORw0KGgoAAAANSUhEUg=="},
1073 "metadata": {}}]}],
1074 "metadata": {}, "nbformat": 4, "nbformat_minor": 5}"#,
1075 );
1076 assert_eq!(media.len(), 1);
1078 }
1079
1080 #[test]
1081 fn markdown_attachment_bytes_are_lifted_into_the_media_bag() {
1082 let (_, media) = read_media(
1083 r#"{"cells": [{"cell_type": "markdown", "id": "cell9", "metadata": {},
1084 "attachments": {"a.png": {"image/png": "iVBORw0KGgoAAAANSUhEUg=="}},
1085 "source": [""]}],
1086 "metadata": {}, "nbformat": 4, "nbformat_minor": 5}"#,
1087 );
1088 let item = media.get("cell9-a.png").expect("attachment is in the bag");
1090 assert_eq!(item.mime.as_deref(), Some("image/png"));
1091 assert_eq!(
1092 item.bytes,
1093 carta_core::media::base64_decode("iVBORw0KGgoAAAANSUhEUg==").unwrap()
1094 );
1095 }
1096
1097 #[test]
1098 fn attachment_without_a_cell_id_uses_the_bare_reference() {
1099 let (_, media) = read_media(
1100 r#"{"cells": [{"cell_type": "markdown", "metadata": {},
1101 "attachments": {"a.png": {"image/png": "iVBORw0KGgoAAAANSUhEUg=="}},
1102 "source": [""]}],
1103 "metadata": {}, "nbformat": 4, "nbformat_minor": 5}"#,
1104 );
1105 assert!(media.contains("a.png"));
1106 }
1107
1108 #[test]
1109 fn image_wins_over_text_and_smaller_mime_wins_among_images() {
1110 let document = read(
1111 r#"{"cells": [{"cell_type": "code", "metadata": {}, "execution_count": 1,
1112 "source": [], "outputs": [
1113 {"output_type": "display_data",
1114 "data": {"image/png": "iVBORw0KGgoAAAANSUhEUg==", "image/jpeg": "iVBORw0KGgoAAAANSUhEUg==",
1115 "text/plain": ["p"]},
1116 "metadata": {}}]}],
1117 "metadata": {}, "nbformat": 4, "nbformat_minor": 5}"#,
1118 );
1119 let Some(Block::Div(_, body)) = first_output(&document) else {
1120 panic!("expected an output div");
1121 };
1122 let Some(Block::Para(inlines)) = body.first() else {
1123 panic!("expected a paragraph");
1124 };
1125 let Some(Inline::Image(_, _, target)) = inlines.first() else {
1126 panic!("expected an image");
1127 };
1128 assert_eq!(target.url, "22f545ac6b50163ce39bac49094c3f64e0858403.jpg");
1130 }
1131
1132 #[test]
1133 fn image_output_metadata_becomes_sorted_attributes() {
1134 let document = read(
1135 r#"{"cells": [{"cell_type": "code", "metadata": {}, "execution_count": 1,
1136 "source": [], "outputs": [
1137 {"output_type": "display_data", "data": {"image/png": "iVBORw0KGgoAAAANSUhEUg=="},
1138 "metadata": {"image/png": {"width": 100, "height": 50, "needs_background": "light"}}}]}],
1139 "metadata": {}, "nbformat": 4, "nbformat_minor": 5}"#,
1140 );
1141 let Some(Block::Div(_, body)) = first_output(&document) else {
1142 panic!("expected an output div");
1143 };
1144 let Some(Block::Para(inlines)) = body.first() else {
1145 panic!("expected a paragraph");
1146 };
1147 let Some(Inline::Image(attr, _, _)) = inlines.first() else {
1148 panic!("expected an image");
1149 };
1150 assert_eq!(
1151 attr.attributes,
1152 vec![
1153 ("height".into(), "50".into()),
1154 ("needs_background".into(), "light".into()),
1155 ("width".into(), "100".into()),
1156 ]
1157 );
1158 }
1159
1160 #[test]
1161 fn structured_json_output_is_compact_and_sorted() {
1162 let document = read(
1163 r#"{"cells": [{"cell_type": "code", "metadata": {}, "execution_count": 1,
1164 "source": [], "outputs": [
1165 {"output_type": "display_data", "data": {"application/json": {"z": 1, "a": 2.0}},
1166 "metadata": {}}]}],
1167 "metadata": {}, "nbformat": 4, "nbformat_minor": 5}"#,
1168 );
1169 let Some(Block::Div(_, body)) = first_output(&document) else {
1170 panic!("expected an output div");
1171 };
1172 let Some(Block::CodeBlock(attr, text)) = body.first() else {
1173 panic!("expected a code block");
1174 };
1175 assert_eq!(attr.classes, vec!["json".to_owned()]);
1176 assert_eq!(text, r#"{"a":2.0,"z":1}"#);
1177 }
1178
1179 #[test]
1180 fn raw_cell_maps_format_to_writer_name() {
1181 let document = read(
1182 r#"{"cells": [
1183 {"cell_type": "raw", "metadata": {"format": "text/html"}, "source": ["<b>x</b>"]},
1184 {"cell_type": "raw", "metadata": {}, "source": ["plain"]}],
1185 "metadata": {}, "nbformat": 4, "nbformat_minor": 5}"#,
1186 );
1187 let Some(Block::Div(attr, body)) = document.blocks.first() else {
1188 panic!("expected a raw cell div");
1189 };
1190 assert_eq!(attr.attributes, vec![("format".into(), "text/html".into())]);
1191 assert!(matches!(
1192 body.first(),
1193 Some(Block::RawBlock(Format(name), text)) if name == "html" && text == "<b>x</b>"
1194 ));
1195 let Some(Block::Div(_, body)) = document.blocks.get(1) else {
1197 panic!("expected a raw cell div");
1198 };
1199 assert!(matches!(
1200 body.first(),
1201 Some(Block::RawBlock(Format(name), _)) if name == "ipynb"
1202 ));
1203 }
1204
1205 #[test]
1206 fn unknown_cell_kinds_are_dropped() {
1207 let document = read(
1208 r#"{"cells": [{"cell_type": "heading", "level": 2, "metadata": {}, "source": ["H"]}],
1209 "metadata": {}, "nbformat": 4, "nbformat_minor": 5}"#,
1210 );
1211 assert!(document.blocks.is_empty());
1212 }
1213
1214 #[test]
1215 fn terminal_control_sequences_are_removed_from_text_outputs() {
1216 let esc = format!("{}u001b", '\\');
1220 let input = format!(
1221 r#"{{"cells": [{{"cell_type": "code", "metadata": {{}}, "execution_count": 1,
1222 "source": [], "outputs": [
1223 {{"output_type": "stream", "name": "stdout",
1224 "text": ["{esc}[31mred{esc}[0m"]}}]}}],
1225 "metadata": {{}}, "nbformat": 4, "nbformat_minor": 5}}"#
1226 );
1227 let document = read(&input);
1228 let Some(Block::Div(_, body)) = first_output(&document) else {
1229 panic!("expected an output div");
1230 };
1231 assert!(matches!(
1232 body.first(),
1233 Some(Block::CodeBlock(_, text)) if text == "red"
1234 ));
1235 }
1236
1237 #[test]
1238 fn malformed_input_is_an_error_not_a_panic() {
1239 assert!(
1240 IpynbReader
1241 .read("not json", &ReaderOptions::default())
1242 .is_err()
1243 );
1244 assert!(IpynbReader.read("", &ReaderOptions::default()).is_err());
1245 }
1246
1247 #[test]
1248 fn pre_v4_notebook_is_an_error_not_a_panic() {
1249 let result = IpynbReader.read(
1250 r#"{"nbformat": 3, "nbformat_minor": 0, "worksheets": []}"#,
1251 &ReaderOptions::default(),
1252 );
1253 assert!(matches!(result, Err(Error::UnsupportedFormat(_))));
1254 }
1255
1256 fn first_output(document: &Document) -> Option<&Block> {
1258 let Some(Block::Div(_, blocks)) = document.blocks.first() else {
1259 return None;
1260 };
1261 blocks.get(1)
1262 }
1263}