use crate::cell::{Cell, CellMetadata};
use crate::metadata::{KernelSpec, NotebookMetadata};
use crate::notebook::Notebook;
use crate::output::MultilineString;
use crate::{ParseError, SerializeError};
use indexmap::IndexMap;
#[derive(Debug, Clone, Default)]
pub struct PercentOptions {
pub header_style: HeaderStyle,
}
#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
pub enum HeaderStyle {
#[default]
Full,
Minimal,
None,
}
pub fn parse(input: &str) -> Result<Notebook, ParseError> {
let (metadata, content) = parse_header(input)?;
let cells = parse_cells(content)?;
Ok(Notebook {
cells,
metadata,
nbformat: 4,
nbformat_minor: 5,
sort_keys: false,
})
}
pub fn serialize(notebook: &Notebook) -> Result<String, SerializeError> {
serialize_with_options(notebook, &PercentOptions::default())
}
pub fn serialize_with_options(
notebook: &Notebook,
options: &PercentOptions,
) -> Result<String, SerializeError> {
let mut output = String::new();
if options.header_style != HeaderStyle::None {
let header = serialize_header(¬ebook.metadata, options.header_style)?;
if !header.is_empty() {
output.push_str(&header);
output.push('\n');
}
}
for (i, cell) in notebook.cells.iter().enumerate() {
if i > 0 || !output.is_empty() {
if !output.is_empty() && !output.ends_with("\n\n") {
output.push('\n');
}
}
match cell {
Cell::Code {
source, metadata, ..
} => {
output.push_str(&format_cell_delimiter("", metadata));
output.push('\n');
let src = source.as_string();
if !src.is_empty() {
output.push_str(&src);
if !src.ends_with('\n') {
output.push('\n');
}
}
}
Cell::Markdown {
source, metadata, ..
} => {
output.push_str(&format_cell_delimiter("[markdown]", metadata));
output.push('\n');
let src = source.as_string();
if !src.is_empty() {
output.push_str(&comment_lines(&src));
if !output.ends_with('\n') {
output.push('\n');
}
}
}
Cell::Raw {
source, metadata, ..
} => {
output.push_str(&format_cell_delimiter("[raw]", metadata));
output.push('\n');
let src = source.as_string();
if !src.is_empty() {
output.push_str(&comment_lines(&src));
if !output.ends_with('\n') {
output.push('\n');
}
}
}
}
}
Ok(output)
}
fn parse_header(input: &str) -> Result<(NotebookMetadata, &str), ParseError> {
let trimmed = input.trim_start();
if !trimmed.starts_with("# ---") {
return Ok((default_metadata(), input));
}
let header_start = input.find("# ---").unwrap();
let after_first_marker = &input[header_start + 5..];
let header_end =
after_first_marker
.find("\n# ---")
.ok_or_else(|| ParseError::InvalidStructure {
message: "YAML header not closed (missing '# ---')".to_string(),
})?;
let header_content = &after_first_marker[..header_end];
let remaining = &after_first_marker[header_end + 6..];
let remaining = remaining.trim_start_matches('\n');
let metadata = parse_yaml_header(header_content)?;
Ok((metadata, remaining))
}
fn parse_yaml_header(content: &str) -> Result<NotebookMetadata, ParseError> {
let yaml_lines: Vec<&str> = content
.lines()
.map(|line| {
if let Some(stripped) = line.strip_prefix("# ") {
stripped
} else if let Some(stripped) = line.strip_prefix("#") {
stripped
} else {
line
}
})
.collect();
let yaml_str = yaml_lines.join("\n");
parse_simple_yaml_metadata(&yaml_str)
}
fn parse_simple_yaml_metadata(yaml: &str) -> Result<NotebookMetadata, ParseError> {
let mut metadata = NotebookMetadata::default();
let mut kernelspec_name = None;
let mut kernelspec_display_name = None;
let mut kernelspec_language = None;
let mut in_kernelspec = false;
for line in yaml.lines() {
let trimmed = line.trim();
if trimmed.starts_with("kernelspec:") {
in_kernelspec = true;
continue;
}
if in_kernelspec {
if !line.starts_with(" ") && !line.starts_with("\t") && !trimmed.is_empty() {
in_kernelspec = false;
} else if let Some(rest) = trimmed.strip_prefix("name:") {
kernelspec_name = Some(rest.trim().to_string());
} else if let Some(rest) = trimmed.strip_prefix("display_name:") {
kernelspec_display_name = Some(rest.trim().to_string());
} else if let Some(rest) = trimmed.strip_prefix("language:") {
kernelspec_language = Some(rest.trim().to_string());
}
}
}
if kernelspec_name.is_some() || kernelspec_display_name.is_some() {
metadata.kernelspec = Some(KernelSpec {
name: kernelspec_name.unwrap_or_else(|| "python3".to_string()),
display_name: kernelspec_display_name.unwrap_or_else(|| "Python 3".to_string()),
language: kernelspec_language.unwrap_or_else(|| "python".to_string()),
});
}
Ok(metadata)
}
fn default_metadata() -> NotebookMetadata {
NotebookMetadata {
kernelspec: Some(KernelSpec {
name: "python3".to_string(),
display_name: "Python 3".to_string(),
language: "python".to_string(),
}),
language_info: None,
extra: IndexMap::new(),
}
}
fn serialize_header(
metadata: &NotebookMetadata,
style: HeaderStyle,
) -> Result<String, SerializeError> {
if style == HeaderStyle::None {
return Ok(String::new());
}
let mut lines = vec!["# ---".to_string()];
if let Some(ref kernelspec) = metadata.kernelspec {
lines.push("# jupyter:".to_string());
lines.push("# kernelspec:".to_string());
lines.push(format!("# display_name: {}", kernelspec.display_name));
lines.push(format!("# language: {}", kernelspec.language));
lines.push(format!("# name: {}", kernelspec.name));
}
lines.push("# ---".to_string());
Ok(lines.join("\n"))
}
fn parse_cells(content: &str) -> Result<Vec<Cell>, ParseError> {
let mut cells = Vec::new();
let parts: Vec<&str> = split_on_cell_markers(content);
for part in parts {
if part.trim().is_empty() {
continue;
}
let cell = parse_cell(part)?;
cells.push(cell);
}
Ok(cells)
}
fn split_on_cell_markers(content: &str) -> Vec<&str> {
let mut parts = Vec::new();
let mut last_end = 0;
for (i, _) in content.match_indices("\n# %%") {
if last_end < i {
let part = &content[last_end..i];
if !part.trim().is_empty() {
parts.push(part);
}
}
last_end = i + 1; }
if content.starts_with("# %%") {
if last_end == 0 {
parts.push(content);
} else {
if last_end < content.len() {
parts.push(&content[last_end..]);
}
}
} else if last_end == 0 {
if !content.trim().is_empty() {
parts.push(content);
}
} else {
if last_end < content.len() {
parts.push(&content[last_end..]);
}
}
parts
}
fn parse_cell(content: &str) -> Result<Cell, ParseError> {
let content = content.trim_start_matches('\n');
if !content.starts_with("# %%") {
return Ok(Cell::Code {
source: MultilineString::from_string(content.to_string()),
execution_count: None,
outputs: Vec::new(),
metadata: CellMetadata::default(),
id: None,
});
}
let first_newline = content.find('\n').unwrap_or(content.len());
let marker_line = &content[..first_newline];
let cell_content = if first_newline < content.len() {
&content[first_newline + 1..]
} else {
""
};
let (cell_type, metadata) = parse_cell_marker(marker_line)?;
match cell_type {
CellType::Code => {
let source = cell_content.trim_end_matches('\n').to_string();
Ok(Cell::Code {
source: MultilineString::from_string(source),
execution_count: None,
outputs: Vec::new(),
metadata,
id: None,
})
}
CellType::Markdown => {
let source = uncomment_lines(cell_content);
Ok(Cell::Markdown {
source: MultilineString::from_string(source),
metadata,
id: None,
})
}
CellType::Raw => {
let source = uncomment_lines(cell_content);
Ok(Cell::Raw {
source: MultilineString::from_string(source),
metadata,
id: None,
})
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum CellType {
Code,
Markdown,
Raw,
}
fn parse_cell_marker(line: &str) -> Result<(CellType, CellMetadata), ParseError> {
let line = line.trim();
let rest = line
.strip_prefix("# %%")
.ok_or_else(|| ParseError::InvalidStructure {
message: format!("Invalid cell marker: {}", line),
})?
.trim();
let (cell_type, rest) = if rest.starts_with("[markdown]") {
(
CellType::Markdown,
rest.strip_prefix("[markdown]").unwrap().trim(),
)
} else if rest.starts_with("[md]") {
(
CellType::Markdown,
rest.strip_prefix("[md]").unwrap().trim(),
)
} else if rest.starts_with("[raw]") {
(CellType::Raw, rest.strip_prefix("[raw]").unwrap().trim())
} else {
(CellType::Code, rest)
};
let metadata = parse_cell_metadata(rest)?;
Ok((cell_type, metadata))
}
fn parse_cell_metadata(content: &str) -> Result<CellMetadata, ParseError> {
let mut metadata = CellMetadata::default();
if content.is_empty() {
return Ok(metadata);
}
if let Some(tags_start) = content.find("tags=") {
let rest = &content[tags_start + 5..];
if rest.starts_with('[') {
if let Some(end) = rest.find(']') {
let tags_str = &rest[1..end];
let tags: Vec<String> = tags_str
.split(',')
.map(|s| s.trim().trim_matches('"').trim_matches('\'').to_string())
.filter(|s| !s.is_empty())
.collect();
if !tags.is_empty() {
metadata.tags = Some(tags);
}
}
}
}
Ok(metadata)
}
fn comment_lines(content: &str) -> String {
content
.lines()
.map(|line| {
if line.is_empty() {
"#".to_string()
} else {
format!("# {}", line)
}
})
.collect::<Vec<_>>()
.join("\n")
}
fn uncomment_lines(content: &str) -> String {
let lines: Vec<&str> = content.lines().collect();
let mut end = lines.len();
while end > 0 && (lines[end - 1] == "#" || lines[end - 1] == "# " || lines[end - 1].is_empty())
{
end -= 1;
}
lines[..end]
.iter()
.map(|line| {
if let Some(stripped) = line.strip_prefix("# ") {
stripped
} else if *line == "#" {
""
} else if let Some(stripped) = line.strip_prefix("#") {
stripped
} else {
*line
}
})
.collect::<Vec<_>>()
.join("\n")
}
fn format_cell_delimiter(cell_type_marker: &str, metadata: &CellMetadata) -> String {
let mut result = if cell_type_marker.is_empty() {
"# %%".to_string()
} else {
format!("# %% {}", cell_type_marker)
};
if let Some(ref tags) = metadata.tags {
if !tags.is_empty() {
let tags_str: Vec<String> = tags.iter().map(|t| format!("\"{}\"", t)).collect();
result.push_str(&format!(" tags=[{}]", tags_str.join(", ")));
}
}
result
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_parse_minimal() {
let input = "# %%\nprint('hello')";
let notebook = parse(input).unwrap();
assert_eq!(notebook.cells.len(), 1);
assert!(notebook.cells[0].is_code());
assert_eq!(notebook.cells[0].source_string(), "print('hello')");
}
#[test]
fn test_parse_markdown_cell() {
let input = "# %% [markdown]\n# # Title\n# Some text";
let notebook = parse(input).unwrap();
assert_eq!(notebook.cells.len(), 1);
assert!(notebook.cells[0].is_markdown());
assert_eq!(notebook.cells[0].source_string(), "# Title\nSome text");
}
#[test]
fn test_parse_mixed_cells() {
let input = r#"# %% [markdown]
# # Hello
# %%
print('world')
# %% [raw]
# raw content
"#;
let notebook = parse(input).unwrap();
assert_eq!(notebook.cells.len(), 3);
assert!(notebook.cells[0].is_markdown());
assert!(notebook.cells[1].is_code());
assert!(notebook.cells[2].is_raw());
}
#[test]
fn test_parse_with_header() {
let input = r#"# ---
# jupyter:
# kernelspec:
# display_name: Python 3
# language: python
# name: python3
# ---
# %%
print('hello')
"#;
let notebook = parse(input).unwrap();
assert_eq!(notebook.cells.len(), 1);
let ks = notebook.metadata.kernelspec.as_ref().unwrap();
assert_eq!(ks.name, "python3");
assert_eq!(ks.display_name, "Python 3");
assert_eq!(ks.language, "python");
}
#[test]
fn test_parse_no_header() {
let input = "# %%\nprint('hello')";
let notebook = parse(input).unwrap();
let ks = notebook.metadata.kernelspec.as_ref().unwrap();
assert_eq!(ks.name, "python3");
}
#[test]
fn test_parse_cell_with_tags() {
let input = "# %% [markdown] tags=[\"hide\", \"skip\"]\n# Content";
let notebook = parse(input).unwrap();
let tags = notebook.cells[0].metadata().tags.as_ref().unwrap();
assert_eq!(tags, &vec!["hide".to_string(), "skip".to_string()]);
}
#[test]
fn test_comment_lines() {
assert_eq!(comment_lines("hello\nworld"), "# hello\n# world");
assert_eq!(comment_lines("line1\n\nline2"), "# line1\n#\n# line2");
assert_eq!(comment_lines(""), "");
}
#[test]
fn test_uncomment_lines() {
assert_eq!(uncomment_lines("# hello\n# world"), "hello\nworld");
assert_eq!(uncomment_lines("# line1\n#\n# line2"), "line1\n\nline2");
assert_eq!(uncomment_lines("#hello"), "hello");
}
#[test]
fn test_serialize_minimal() {
let mut notebook = Notebook::new();
notebook.cells.push(Cell::code("print('hello')"));
let output = serialize(¬ebook).unwrap();
assert!(output.contains("# %%"));
assert!(output.contains("print('hello')"));
}
#[test]
fn test_serialize_markdown() {
let mut notebook = Notebook::new();
notebook.cells.push(Cell::markdown("# Title\nSome text"));
let output = serialize(¬ebook).unwrap();
assert!(output.contains("# %% [markdown]"));
assert!(output.contains("# # Title"));
assert!(output.contains("# Some text"));
}
#[test]
fn test_serialize_no_header() {
let mut notebook = Notebook::new();
notebook.cells.push(Cell::code("x = 1"));
let options = PercentOptions {
header_style: HeaderStyle::None,
};
let output = serialize_with_options(¬ebook, &options).unwrap();
assert!(!output.contains("# ---"));
assert!(output.starts_with("# %%"));
}
#[test]
fn test_round_trip() {
let input = r#"# ---
# jupyter:
# kernelspec:
# display_name: Python 3
# language: python
# name: python3
# ---
# %% [markdown]
# # Title
# Some text
# %%
print('hello')
x = 1
# %% [raw]
# Raw content
"#;
let notebook = parse(input).unwrap();
let output = serialize(¬ebook).unwrap();
let reparsed = parse(&output).unwrap();
assert_eq!(notebook.cells.len(), reparsed.cells.len());
for (orig, new) in notebook.cells.iter().zip(reparsed.cells.iter()) {
assert_eq!(orig.source_string(), new.source_string());
}
}
#[test]
fn test_empty_markdown_lines() {
let input = "# %% [markdown]\n# Line 1\n#\n# Line 2";
let notebook = parse(input).unwrap();
assert_eq!(notebook.cells[0].source_string(), "Line 1\n\nLine 2");
}
#[test]
fn test_md_shorthand() {
let input = "# %% [md]\n# Content";
let notebook = parse(input).unwrap();
assert!(notebook.cells[0].is_markdown());
assert_eq!(notebook.cells[0].source_string(), "Content");
}
}