1pub mod legacy;
2pub mod v3;
3pub mod v4;
4
5use serde::Serialize as _;
6use thiserror::Error;
7
8#[derive(Error, Debug)]
9pub enum NotebookError {
10 #[error("Unsupported notebook version: {0}.{1}")]
11 UnsupportedVersion(i32, i32),
12 #[error("JSON parsing error: {0}")]
13 JsonError(#[from] serde_json::Error),
14 #[error("Validation error: {0}")]
15 ValidationError(String),
16}
17
18#[derive(Debug, Clone, PartialEq, Eq)]
23#[non_exhaustive]
24pub enum Quirk {
25 MissingCellId { cell_index: usize },
28}
29
30#[derive(Debug, Clone)]
38pub struct V4Quirks {
39 notebook: v4::Notebook,
40 quirks: Vec<Quirk>,
41}
42
43impl V4Quirks {
44 pub fn quirks(&self) -> &[Quirk] {
46 &self.quirks
47 }
48
49 pub fn notebook(&self) -> &v4::Notebook {
52 &self.notebook
53 }
54
55 pub fn repair(self) -> v4::Notebook {
63 self.notebook
64 }
65}
66
67#[derive(Debug)]
68#[non_exhaustive]
69pub enum Notebook {
70 V4(v4::Notebook),
71 V4QuirksMode(V4Quirks),
72 Legacy(legacy::Notebook),
73 V3(v3::Notebook),
74}
75
76fn detect_v45_quirks(value: &serde_json::Value) -> Vec<Quirk> {
82 let mut quirks = Vec::new();
83
84 let Some(cells) = value.get("cells").and_then(|v| v.as_array()) else {
85 return quirks;
86 };
87
88 for (cell_index, cell) in cells.iter().enumerate() {
89 let has_non_empty_id = cell
90 .get("id")
91 .and_then(|v| v.as_str())
92 .map(|s| !s.is_empty())
93 .unwrap_or(false);
94
95 if !has_non_empty_id {
96 quirks.push(Quirk::MissingCellId { cell_index });
97 }
98 }
99
100 quirks
101}
102
103pub fn parse_notebook(json: &str) -> Result<Notebook, NotebookError> {
104 let value: serde_json::Value = serde_json::from_str(json)?;
105 let nbformat = value["nbformat"].as_i64().unwrap_or(0) as i32;
106 let nbformat_minor = value["nbformat_minor"].as_i64().unwrap_or(0) as i32;
107
108 match (nbformat, nbformat_minor) {
109 (4, 5) => {
110 let quirks = detect_v45_quirks(&value);
111 let notebook = serde_json::from_value::<v4::Notebook>(value)?;
112 if quirks.is_empty() {
113 Ok(Notebook::V4(notebook))
114 } else {
115 Ok(Notebook::V4QuirksMode(V4Quirks { notebook, quirks }))
116 }
117 }
118 (4, 0) | (4, 1) | (4, 2) | (4, 3) | (4, 4) => Ok(Notebook::Legacy(
119 serde_json::from_value::<legacy::Notebook>(value)?,
120 )),
121 (3, _) => Ok(Notebook::V3(serde_json::from_value::<v3::Notebook>(value)?)),
122 _ => Err(NotebookError::UnsupportedVersion(nbformat, nbformat_minor)),
123 }
124}
125
126fn sort_value_keys(value: serde_json::Value) -> serde_json::Value {
135 match value {
136 serde_json::Value::Object(map) => {
137 let mut entries: Vec<(String, serde_json::Value)> = map.into_iter().collect();
138 entries.sort_by(|a, b| a.0.cmp(&b.0));
139 let mut sorted = serde_json::Map::new();
140 for (k, v) in entries {
141 sorted.insert(k, sort_value_keys(v));
142 }
143 serde_json::Value::Object(sorted)
144 }
145 serde_json::Value::Array(items) => {
146 serde_json::Value::Array(items.into_iter().map(sort_value_keys).collect())
147 }
148 other => other,
149 }
150}
151
152pub fn serialize_notebook(notebook: &Notebook) -> Result<String, NotebookError> {
153 match notebook {
154 Notebook::V4(notebook) => {
155 let value = sort_value_keys(serde_json::to_value(notebook)?);
156 let mut buf = Vec::new();
157 let formatter = serde_json::ser::PrettyFormatter::with_indent(b" ");
158 let mut ser = serde_json::Serializer::with_formatter(&mut buf, formatter);
159 value.serialize(&mut ser)?;
160
161 buf.append(&mut b"\n".to_vec());
163
164 let notebook_json = String::from_utf8(buf)
165 .map_err(|e| NotebookError::ValidationError(e.to_string()))?;
166
167 Ok(notebook_json)
168 }
169 Notebook::V4QuirksMode(_) => Err(NotebookError::ValidationError(
170 "v4.5 notebook has quirks — call V4Quirks::repair() before serializing".to_string(),
171 )),
172 Notebook::Legacy(notebook) => Err(NotebookError::UnsupportedVersion(
173 notebook.nbformat,
174 notebook.nbformat_minor,
175 )),
176 Notebook::V3(notebook) => Err(NotebookError::UnsupportedVersion(
177 notebook.nbformat,
178 notebook.nbformat_minor.unwrap_or(0),
179 )),
180 }
181}
182
183pub fn upgrade_legacy_notebook(legacy_notebook: legacy::Notebook) -> anyhow::Result<v4::Notebook> {
184 let cells: Vec<v4::Cell> = legacy_notebook
185 .cells
186 .into_iter()
187 .map(|cell: legacy::Cell| match cell {
188 legacy::Cell::Markdown {
189 id,
190 metadata,
191 source,
192 attachments,
193 } => v4::Cell::Markdown {
194 id: id.unwrap_or_else(|| uuid::Uuid::new_v4().into()),
195 metadata,
196 source,
197 attachments,
198 },
199 legacy::Cell::Code {
200 id,
201 metadata,
202 execution_count,
203 source,
204 outputs,
205 } => v4::Cell::Code {
206 id: id.unwrap_or_else(|| uuid::Uuid::new_v4().into()),
207 metadata,
208 execution_count,
209 source,
210 outputs,
211 },
212 legacy::Cell::Raw {
213 id,
214 metadata,
215 source,
216 } => v4::Cell::Raw {
217 id: id.unwrap_or_else(|| uuid::Uuid::new_v4().into()),
218 metadata,
219 source,
220 },
221 })
222 .collect();
223
224 let mut seen_ids = std::collections::HashSet::new();
226 for cell in &cells {
227 if !seen_ids.insert(cell.id()) {
228 return Err(anyhow::anyhow!("Duplicate Cell ID found: {}", cell.id()));
229 }
230 }
231
232 Ok(v4::Notebook {
233 cells,
234 metadata: legacy_notebook.metadata,
235 nbformat: 4,
236 nbformat_minor: 5,
237 })
238}
239
240pub fn upgrade_v3_notebook(v3_notebook: v3::Notebook) -> anyhow::Result<v4::Notebook> {
241 let mut all_cells: Vec<v3::Cell> = Vec::new();
242
243 if let Some(worksheets) = v3_notebook.worksheets {
244 for worksheet in worksheets {
245 all_cells.extend(worksheet.cells);
246 }
247 }
248
249 let cells: Vec<v4::Cell> = all_cells
250 .into_iter()
251 .map(|cell: v3::Cell| match cell {
252 v3::Cell::Heading {
253 level,
254 metadata,
255 source,
256 } => {
257 let heading_prefix = "#".repeat(level as usize);
258 let joined = source.join("");
261 let new_source = if joined.trim().is_empty() {
262 vec![format!("{}", heading_prefix)]
263 } else {
264 vec![format!("{} {}", heading_prefix, joined)]
265 };
266 v4::Cell::Markdown {
267 id: uuid::Uuid::new_v4().into(),
268 metadata,
269 source: new_source,
270 attachments: None,
271 }
272 }
273 v3::Cell::Markdown {
274 metadata,
275 source,
276 attachments,
277 } => v4::Cell::Markdown {
278 id: uuid::Uuid::new_v4().into(),
279 metadata,
280 source,
281 attachments,
282 },
283 v3::Cell::Code {
284 metadata,
285 prompt_number,
286 input,
287 language: _,
288 outputs,
289 } => v4::Cell::Code {
290 id: uuid::Uuid::new_v4().into(),
291 metadata,
292 execution_count: prompt_number,
293 source: input.unwrap_or_default(),
294 outputs: outputs.into_iter().map(convert_v3_output).collect(),
295 },
296 v3::Cell::Raw { metadata, source } => v4::Cell::Raw {
297 id: uuid::Uuid::new_v4().into(),
298 metadata,
299 source,
300 },
301 })
302 .collect();
303
304 let metadata = convert_v3_metadata(v3_notebook.metadata.as_ref());
307
308 Ok(v4::Notebook {
309 cells,
310 metadata,
311 nbformat: 4,
312 nbformat_minor: 5,
313 })
314}
315
316fn convert_v3_metadata(v3_metadata: Option<&serde_json::Value>) -> v4::Metadata {
317 let mut metadata = v4::Metadata::default();
318
319 if let Some(v3_metadata) = v3_metadata {
320 if let Some(obj) = v3_metadata.as_object() {
321 let language = obj
323 .get("language_info")
324 .and_then(|li| li.get("name"))
325 .and_then(|v| v.as_str())
326 .map(|s| s.to_string());
327
328 if let Some(kernel_info) = obj.get("kernel_info") {
329 if let Some(name) = kernel_info.get("name").and_then(|v| v.as_str()) {
330 metadata.kernelspec = Some(v4::KernelSpec {
331 display_name: name.to_string(),
332 name: name.to_string(),
333 language: language.clone(),
336 additional: std::collections::HashMap::new(),
337 });
338 }
339 }
340
341 if let Some(language_info) = obj.get("language_info") {
342 if let Some(name) = language_info.get("name").and_then(|v| v.as_str()) {
343 let version = language_info
344 .get("version")
345 .and_then(|v| v.as_str())
346 .map(|s| s.to_string());
347 metadata.language_info = Some(v4::LanguageInfo {
348 name: name.to_string(),
349 version,
350 codemirror_mode: None,
351 additional: std::collections::HashMap::new(),
352 });
353 }
354 }
355
356 for (key, value) in obj {
357 if key != "kernel_info" && key != "language_info" {
358 metadata.additional.insert(key.clone(), value.clone());
359 }
360 }
361 }
362 }
363
364 metadata
365}
366
367fn map_v3_media_fields(
368 fields: &serde_json::Map<String, serde_json::Value>,
369 skip_keys: &[&str],
370) -> Vec<jupyter_protocol::media::MediaType> {
371 fields
372 .iter()
373 .filter(|(k, _)| !skip_keys.contains(&k.as_str()))
374 .filter_map(|(k, v)| {
375 let content = v3::join_media_value(v)?;
376 let media_type = match k.as_str() {
377 "text" => jupyter_protocol::media::MediaType::Plain(content),
378 "html" => jupyter_protocol::media::MediaType::Html(content),
379 "png" => jupyter_protocol::media::MediaType::Png(content),
380 "jpeg" => jupyter_protocol::media::MediaType::Jpeg(content),
381 "svg" => jupyter_protocol::media::MediaType::Svg(content),
382 "latex" => jupyter_protocol::media::MediaType::Latex(content),
383 "javascript" => jupyter_protocol::media::MediaType::Javascript(content),
384 "json" => {
385 let parsed = serde_json::from_str(&content)
386 .unwrap_or(serde_json::Value::String(content));
387 return Some(jupyter_protocol::media::MediaType::Json(parsed));
388 }
389 _ => jupyter_protocol::media::MediaType::Other((
390 k.clone(),
391 serde_json::Value::String(content),
392 )),
393 };
394 Some(media_type)
395 })
396 .collect()
397}
398
399fn convert_v3_output(v3_output: v3::Output) -> v4::Output {
400 match v3_output {
401 v3::Output::Stream { name, stream, text } => v4::Output::Stream {
402 name: name.unwrap_or_else(|| stream.unwrap_or_else(|| "stdout".to_string())),
403 text: v4::MultilineString(text.join("")),
404 },
405 v3::Output::PyOut {
406 prompt_number,
407 metadata,
408 extra_fields,
409 } => {
410 let data = map_v3_media_fields(&extra_fields, &["output_type"]);
411
412 let metadata = match metadata {
413 serde_json::Value::Object(map) => map,
414 _ => serde_json::Map::new(),
415 };
416 let execution_count =
417 jupyter_protocol::ExecutionCount::new(prompt_number.unwrap_or(0).max(0) as usize);
418 v4::Output::ExecuteResult(v4::ExecuteResult {
419 execution_count,
420 data: jupyter_protocol::media::Media::new(data),
421 metadata,
422 })
423 }
424 v3::Output::DisplayData {
425 metadata: _,
426 extra_fields,
427 } => {
428 let media_vec = map_v3_media_fields(&extra_fields, &["output_type", "metadata"]);
431 v4::Output::DisplayData(v4::DisplayData {
432 data: jupyter_protocol::media::Media::new(media_vec),
433 metadata: serde_json::Map::new(),
434 })
435 }
436 v3::Output::PyErr {
437 ename,
438 evalue,
439 traceback,
440 } => v4::Output::Error(v4::ErrorOutput {
441 ename: ename.unwrap_or_default(),
442 evalue: evalue.unwrap_or_default(),
443 traceback,
444 }),
445 }
446}
447
448#[cfg(test)]
449mod sort_value_keys_tests {
450 use super::sort_value_keys;
451 use serde_json::json;
452
453 fn top_level_keys(v: &serde_json::Value) -> Vec<&str> {
454 v.as_object()
455 .expect("expected object")
456 .keys()
457 .map(String::as_str)
458 .collect()
459 }
460
461 #[test]
462 fn sorts_top_level_keys() {
463 let sorted = sort_value_keys(json!({
464 "zebra": 1,
465 "apple": 2,
466 "mango": 3,
467 }));
468 assert_eq!(top_level_keys(&sorted), vec!["apple", "mango", "zebra"]);
469 }
470
471 #[test]
472 fn sorts_nested_object_keys() {
473 let sorted = sort_value_keys(json!({
474 "outer": {
475 "zebra": 1,
476 "apple": 2,
477 }
478 }));
479 let inner = sorted.get("outer").unwrap();
480 assert_eq!(top_level_keys(inner), vec!["apple", "zebra"]);
481 }
482
483 #[test]
484 fn sorts_keys_inside_arrays() {
485 let sorted = sort_value_keys(json!({
486 "cells": [
487 { "zebra": 1, "apple": 2 },
488 { "mango": 3, "banana": 4 },
489 ]
490 }));
491 let cells = sorted.get("cells").unwrap().as_array().unwrap();
492 assert_eq!(top_level_keys(&cells[0]), vec!["apple", "zebra"]);
493 assert_eq!(top_level_keys(&cells[1]), vec!["banana", "mango"]);
494 }
495
496 #[test]
497 fn preserves_array_element_order() {
498 let sorted = sort_value_keys(json!({
499 "list": [3, 1, 2],
500 }));
501 let list = sorted.get("list").unwrap().as_array().unwrap();
502 let values: Vec<i64> = list.iter().map(|v| v.as_i64().unwrap()).collect();
503 assert_eq!(values, vec![3, 1, 2]);
504 }
505
506 #[test]
507 fn leaves_scalars_untouched() {
508 assert_eq!(sort_value_keys(json!(null)), json!(null));
509 assert_eq!(sort_value_keys(json!(true)), json!(true));
510 assert_eq!(sort_value_keys(json!(42)), json!(42));
511 assert_eq!(sort_value_keys(json!("hello")), json!("hello"));
512 }
513}