nbwipers 0.5.1

Wipe clean your Jupyter Notebooks!
Documentation
use serde::{Deserialize, Serialize};
use serde_json::Value;
use serde_with::skip_serializing_none;

// The schema declarations in this file are taken from Ruff, used under the MIT license

/// The root of the JSON of a Jupyter Notebook
///
/// Generated by <https://app.quicktype.io/> from
/// <https://github.com/jupyter/nbformat/blob/16b53251aabf472ad9406ddb1f78b0421c014eeb/nbformat/v4/nbformat.v4.schema.json>
/// Jupyter Notebook v4.5 JSON schema.
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)]
#[serde(deny_unknown_fields)]
pub struct RawNotebook {
    /// Array of cells of the current notebook.
    pub cells: Vec<Cell>,
    /// Notebook root-level metadata.
    pub metadata: Value,
    /// Notebook format (major number). Incremented between backwards incompatible changes to the
    /// notebook format.
    pub nbformat: i64,
    /// Notebook format (minor number). Incremented for backward compatible changes to the
    /// notebook format.
    pub nbformat_minor: i64,
}

impl RawNotebook {
    pub fn new() -> Self {
        Self {
            cells: Vec::new(),
            metadata: Value::Null,
            nbformat: 4,
            nbformat_minor: 5,
        }
    }
}

impl Default for RawNotebook {
    fn default() -> Self {
        Self::new()
    }
}

/// String identifying the type of cell.
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)]
#[serde(tag = "cell_type")]
pub enum Cell {
    #[serde(rename = "code")]
    Code(CodeCell),
    #[serde(rename = "markdown")]
    Markdown(MarkdownCell),
    #[serde(rename = "raw")]
    Raw(RawCell),
}

/// Notebook raw nbconvert cell.
#[skip_serializing_none]
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)]
#[serde(deny_unknown_fields)]
pub struct RawCell {
    pub attachments: Option<Value>,
    /// Technically, id isn't required (it's not even present) in schema v4.0 through v4.4, but
    /// it's required in v4.5. Main issue is that pycharm creates notebooks without an id
    /// <https://youtrack.jetbrains.com/issue/PY-59438/Jupyter-notebooks-created-with-PyCharm-are-missing-the-id-field-in-cells-in-the-.ipynb-json>
    pub id: Option<String>,
    /// Cell-level metadata.
    pub metadata: Value,
    pub source: SourceValue,
}

/// Notebook markdown cell.
#[skip_serializing_none]
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)]
#[serde(deny_unknown_fields)]
pub struct MarkdownCell {
    pub attachments: Option<Value>,
    /// Technically, id isn't required (it's not even present) in schema v4.0 through v4.4, but
    /// it's required in v4.5. Main issue is that pycharm creates notebooks without an id
    /// <https://youtrack.jetbrains.com/issue/PY-59438/Jupyter-notebooks-created-with-PyCharm-are-missing-the-id-field-in-cells-in-the-.ipynb-json>
    pub id: Option<String>,
    /// Cell-level metadata.
    pub metadata: Value,
    pub source: SourceValue,
}

/// Notebook code cell.
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)]
#[serde(deny_unknown_fields)]
pub struct CodeCell {
    /// The code cell's prompt number. Will be null if the cell has not been run.
    pub execution_count: Option<i64>,
    /// Technically, id isn't required (it's not even present) in schema v4.0 through v4.4, but
    /// it's required in v4.5. Main issue is that pycharm creates notebooks without an id
    /// <https://youtrack.jetbrains.com/issue/PY-59438/Jupyter-notebooks-created-with-PyCharm-are-missing-the-id-field-in-cells-in-the-.ipynb-json>
    #[serde(skip_serializing_if = "Option::is_none")]
    pub id: Option<String>,
    /// Cell-level metadata.
    pub metadata: Value,
    /// Execution, display, or stream outputs.
    pub outputs: Vec<Value>,
    pub source: SourceValue,
}

/// mimetype output (e.g. text/plain), represented as either an array of strings or a
/// string.
///
/// Contents of the cell, represented as an array of lines.
///
/// The stream's text output, represented as an array of strings.
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)]
#[serde(untagged)]
pub enum SourceValue {
    String(String),
    StringArray(Vec<String>),
}

#[allow(clippy::unwrap_used)]
#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_cell_types() {
        let empty_nb_str = r##"{
            "cells": [
                {
                    "cell_type": "code",
                    "execution_count": 123,
                    "metadata": {},
                    "outputs": [],
                    "source": [
                        "print(\"hello world\")",
                        "print(\"goodbye\")"
                    ]
                },
                {
                    "cell_type": "raw",
                    "metadata": {},
                    "source": [
                        "I am a raw cell"
                    ]
                },
                {
                    "cell_type": "markdown",
                    "metadata": {},
                    "source": "# Welcome to the documentation"

                }
            ],
            "metadata": {},
            "nbformat": 4,
            "nbformat_minor": 5
        }"##;

        let mut parsed: RawNotebook = serde_json::from_str(empty_nb_str).unwrap();

        let [code, raw, Cell::Markdown(markdown)] = &mut parsed.cells[..] else {
            panic!();
        };
        let code = code.as_codecell_mut().unwrap();
        assert!(raw.as_codecell().is_none());
        assert!(matches!(raw, Cell::Raw(_)));

        assert!(matches!(raw.get_source(), SourceValue::StringArray(_)));
        assert!(matches!(markdown.source, SourceValue::String(_)));

        assert!(code.is_clear_outputs());
        assert!(!code.is_clear_exec_count());
        assert!(code.should_clear_output(true, true));
        code.clear_counts();
        code.clear_outputs();
        assert!(code.is_clear_outputs());
        assert!(code.is_clear_exec_count());
    }
}