Skip to main content

entrenar/research/notebook/
exporter.rs

1//! Notebook exporter for Jupyter format.
2
3use serde::{Deserialize, Serialize};
4use serde_json::json;
5
6use super::cell::{Cell, CellType};
7use super::kernel::KernelSpec;
8use crate::research::literate::LiterateDocument;
9
10/// Notebook exporter
11#[derive(Debug, Clone, Serialize, Deserialize)]
12pub struct NotebookExporter {
13    /// Notebook cells
14    pub cells: Vec<Cell>,
15    /// Kernel specification
16    pub kernel: KernelSpec,
17    /// Notebook metadata
18    pub metadata: NotebookMetadata,
19}
20
21impl Default for NotebookExporter {
22    fn default() -> Self {
23        Self::new()
24    }
25}
26
27impl NotebookExporter {
28    /// Create a new notebook exporter
29    pub fn new() -> Self {
30        Self {
31            cells: Vec::new(),
32            kernel: KernelSpec::python3(),
33            metadata: NotebookMetadata::default(),
34        }
35    }
36
37    /// Create with a specific kernel
38    pub fn with_kernel(kernel: KernelSpec) -> Self {
39        Self { cells: Vec::new(), kernel, metadata: NotebookMetadata::default() }
40    }
41
42    /// Add a cell to the notebook
43    pub fn add_cell(&mut self, cell: Cell) {
44        self.cells.push(cell);
45    }
46
47    /// Add a code cell
48    pub fn add_code(&mut self, source: impl Into<String>) {
49        self.cells.push(Cell::code(source));
50    }
51
52    /// Add a markdown cell
53    pub fn add_markdown(&mut self, source: impl Into<String>) {
54        self.cells.push(Cell::markdown(source));
55    }
56
57    /// Create from a literate document
58    pub fn from_literate(doc: &LiterateDocument) -> Self {
59        let mut exporter = Self::new();
60
61        // Determine kernel from document type
62        if doc.is_typst() || doc.is_markdown() {
63            // Extract code blocks and intersperse with markdown
64            let content = doc.content();
65            let blocks = doc.extract_code_blocks();
66
67            // Detect primary language for kernel selection
68            let primary_lang = blocks.iter().find_map(|b| b.language.as_ref()).map(String::as_str);
69
70            exporter.kernel = match primary_lang {
71                Some("rust") => KernelSpec::evcxr(),
72                Some("julia") => KernelSpec::julia(),
73                Some(other_lang) => {
74                    eprintln!(
75                        "Warning: unsupported kernel language '{other_lang}', defaulting to Python 3"
76                    );
77                    KernelSpec::python3()
78                }
79                None => KernelSpec::python3(),
80            };
81
82            // Simple parsing: add everything before first code block as markdown
83            // then alternate between code and markdown
84            let mut last_end = 0;
85
86            for block in &blocks {
87                // Find the start of this code block in the content
88                let block_pattern = format!("```{}", block.language.as_deref().unwrap_or(""));
89                if let Some(start_pos) = content[last_end..].find(&block_pattern) {
90                    let absolute_start = last_end + start_pos;
91
92                    // Add markdown before this block
93                    let markdown_content = &content[last_end..absolute_start];
94                    let trimmed = markdown_content.trim();
95                    if !trimmed.is_empty() {
96                        exporter.add_markdown(trimmed);
97                    }
98
99                    // Add the code block
100                    exporter.add_code(&block.content);
101
102                    // Find the end of this code block
103                    let code_end = content[absolute_start..]
104                        .find("```\n")
105                        .or_else(|| content[absolute_start..].find("```"))
106                        .map_or(content.len(), |p| {
107                            absolute_start
108                                + p
109                                + content[absolute_start + p..].find('\n').unwrap_or(3)
110                                + 1
111                        });
112
113                    last_end = code_end.min(content.len());
114                }
115            }
116
117            // Add remaining markdown after last code block
118            if last_end < content.len() {
119                let remaining = content[last_end..].trim();
120                if !remaining.is_empty() {
121                    exporter.add_markdown(remaining);
122                }
123            }
124        } else {
125            // Raw text: add as single markdown cell
126            exporter.add_markdown(doc.content());
127        }
128
129        exporter
130    }
131
132    /// Export to Jupyter notebook JSON format
133    pub fn to_ipynb(&self) -> String {
134        let notebook = json!({
135            "nbformat": 4,
136            "nbformat_minor": 5,
137            "metadata": {
138                "kernelspec": {
139                    "display_name": self.kernel.display_name,
140                    "language": self.kernel.language,
141                    "name": self.kernel.name
142                },
143                "language_info": {
144                    "name": self.kernel.language
145                }
146            },
147            "cells": self.cells.iter().map(|cell| {
148                let mut cell_json = json!({
149                    "cell_type": cell.cell_type.to_string(),
150                    "source": cell.source,
151                    "metadata": cell.metadata
152                });
153
154                if cell.cell_type == CellType::Code {
155                    cell_json["outputs"] = json!(cell.outputs.iter().map(|o| {
156                        let mut out = json!({
157                            "output_type": o.output_type
158                        });
159                        if let Some(data) = &o.data {
160                            out["data"] = data.clone();
161                        }
162                        if let Some(text) = &o.text {
163                            out["text"] = json!(text);
164                            out["name"] = json!("stdout");
165                        }
166                        out
167                    }).collect::<Vec<_>>());
168                    cell_json["execution_count"] = json!(cell.execution_count);
169                }
170
171                cell_json
172            }).collect::<Vec<_>>()
173        });
174
175        serde_json::to_string_pretty(&notebook).unwrap_or_else(|_err| "{}".to_string())
176    }
177
178    /// Get the number of cells
179    pub fn cell_count(&self) -> usize {
180        self.cells.len()
181    }
182
183    /// Get code cells only
184    pub fn code_cells(&self) -> Vec<&Cell> {
185        self.cells.iter().filter(|c| c.cell_type == CellType::Code).collect()
186    }
187
188    /// Get markdown cells only
189    pub fn markdown_cells(&self) -> Vec<&Cell> {
190        self.cells.iter().filter(|c| c.cell_type == CellType::Markdown).collect()
191    }
192}
193
194/// Notebook metadata
195#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
196pub struct NotebookMetadata {
197    /// Notebook title
198    #[serde(skip_serializing_if = "Option::is_none")]
199    pub title: Option<String>,
200    /// Authors
201    #[serde(skip_serializing_if = "Vec::is_empty", default)]
202    pub authors: Vec<String>,
203}
204
205#[cfg(test)]
206mod tests {
207    use super::super::cell::CellOutput;
208    use super::*;
209
210    #[test]
211    fn test_kernel_selection_all_language_variants() {
212        let languages: &[Option<&str>] = &[Some("rust"), Some("julia"), Some("javascript"), None];
213
214        for lang in languages {
215            // Syntactic match covering all arms from from_literate kernel selection
216            let kernel = match *lang {
217                Some("rust") => KernelSpec::evcxr(),
218                Some("julia") => KernelSpec::julia(),
219                Some(_other_lang) => KernelSpec::python3(),
220                None => KernelSpec::python3(),
221            };
222
223            match lang {
224                Some("rust") => assert_eq!(kernel.language, "rust"),
225                Some("julia") => assert_eq!(kernel.language, "julia"),
226                Some(_) => assert_eq!(kernel.language, "python"),
227                None => assert_eq!(kernel.language, "python"),
228            }
229        }
230    }
231
232    #[test]
233    fn test_notebook_exporter_new() {
234        let exporter = NotebookExporter::new();
235        assert_eq!(exporter.cells.len(), 0);
236        assert_eq!(exporter.kernel.language, "python");
237    }
238
239    #[test]
240    fn test_notebook_exporter_default() {
241        let exporter = NotebookExporter::default();
242        assert_eq!(exporter.cell_count(), 0);
243    }
244
245    #[test]
246    fn test_add_code_and_markdown() {
247        let mut exporter = NotebookExporter::new();
248        exporter.add_code("print('hello')");
249        exporter.add_markdown("# Title");
250        assert_eq!(exporter.cell_count(), 2);
251        assert_eq!(exporter.code_cells().len(), 1);
252        assert_eq!(exporter.markdown_cells().len(), 1);
253    }
254
255    #[test]
256    fn test_notebook_metadata_default() {
257        let meta = NotebookMetadata::default();
258        assert!(meta.title.is_none());
259        assert!(meta.authors.is_empty());
260    }
261
262    // ── Additional coverage tests ─────────────────────────────────
263
264    #[test]
265    fn test_notebook_exporter_with_kernel() {
266        let exporter = NotebookExporter::with_kernel(KernelSpec::evcxr());
267        assert_eq!(exporter.kernel.language, "rust");
268        assert_eq!(exporter.cell_count(), 0);
269    }
270
271    #[test]
272    fn test_notebook_exporter_with_julia_kernel() {
273        let exporter = NotebookExporter::with_kernel(KernelSpec::julia());
274        assert_eq!(exporter.kernel.language, "julia");
275        assert_eq!(exporter.kernel.display_name, "Julia 1.9");
276    }
277
278    #[test]
279    fn test_add_cell_directly() {
280        let mut exporter = NotebookExporter::new();
281        exporter.add_cell(Cell::code("x = 1"));
282        exporter.add_cell(Cell::markdown("# Hello"));
283        exporter.add_cell(Cell::raw("raw text"));
284        assert_eq!(exporter.cell_count(), 3);
285        assert_eq!(exporter.code_cells().len(), 1);
286        assert_eq!(exporter.markdown_cells().len(), 1);
287    }
288
289    #[test]
290    fn test_to_ipynb_empty() {
291        let exporter = NotebookExporter::new();
292        let json = exporter.to_ipynb();
293        assert!(json.contains("nbformat"));
294        assert!(json.contains("\"cells\": []"));
295    }
296
297    #[test]
298    fn test_to_ipynb_with_cells() {
299        let mut exporter = NotebookExporter::new();
300        exporter.add_code("print('hello')");
301        exporter.add_markdown("# Title");
302        let json = exporter.to_ipynb();
303        assert!(json.contains("print('hello')"));
304        assert!(json.contains("# Title"));
305        assert!(json.contains("\"cell_type\": \"code\""));
306        assert!(json.contains("\"cell_type\": \"markdown\""));
307    }
308
309    #[test]
310    fn test_to_ipynb_kernelspec() {
311        let exporter = NotebookExporter::with_kernel(KernelSpec::evcxr());
312        let json = exporter.to_ipynb();
313        assert!(json.contains("\"language\": \"rust\""));
314        assert!(json.contains("Rust"));
315    }
316
317    #[test]
318    fn test_to_ipynb_code_cell_has_outputs_and_execution_count() {
319        let mut exporter = NotebookExporter::new();
320        exporter.add_code("1 + 1");
321        let json = exporter.to_ipynb();
322        assert!(json.contains("\"outputs\""));
323        assert!(json.contains("\"execution_count\""));
324    }
325
326    #[test]
327    fn test_to_ipynb_with_output() {
328        let mut exporter = NotebookExporter::new();
329        let cell = Cell::code("print(42)")
330            .with_output(CellOutput::stream("stdout", "42\n"))
331            .with_execution_count(1);
332        exporter.add_cell(cell);
333        let json = exporter.to_ipynb();
334        assert!(json.contains("stream"));
335        assert!(json.contains("42"));
336        assert!(json.contains("stdout"));
337    }
338
339    #[test]
340    fn test_to_ipynb_with_execute_result() {
341        let mut exporter = NotebookExporter::new();
342        let data = serde_json::json!({"text/plain": ["result"]});
343        let cell = Cell::code("1 + 1").with_output(CellOutput::execute_result(data));
344        exporter.add_cell(cell);
345        let json = exporter.to_ipynb();
346        assert!(json.contains("execute_result"));
347        assert!(json.contains("text/plain"));
348    }
349
350    #[test]
351    fn test_from_literate_markdown() {
352        let doc = LiterateDocument::parse_markdown(
353            "# Hello\n\nSome text.\n\n```python\nprint('hi')\n```\n\nMore text.",
354        );
355        let exporter = NotebookExporter::from_literate(&doc);
356        assert!(exporter.cell_count() > 0);
357        // Should have at least one code cell and one markdown cell
358        assert!(!exporter.code_cells().is_empty());
359        assert!(!exporter.markdown_cells().is_empty());
360        assert_eq!(exporter.kernel.language, "python");
361    }
362
363    #[test]
364    fn test_from_literate_rust_kernel() {
365        let doc =
366            LiterateDocument::parse_markdown("# Rust Example\n\n```rust\nfn main() {}\n```\n");
367        let exporter = NotebookExporter::from_literate(&doc);
368        assert_eq!(exporter.kernel.language, "rust");
369    }
370
371    #[test]
372    fn test_from_literate_julia_kernel() {
373        let doc =
374            LiterateDocument::parse_markdown("# Julia Example\n\n```julia\nprintln(\"hi\")\n```\n");
375        let exporter = NotebookExporter::from_literate(&doc);
376        assert_eq!(exporter.kernel.language, "julia");
377    }
378
379    #[test]
380    fn test_from_literate_no_code_blocks() {
381        let doc = LiterateDocument::parse_markdown("# Just Markdown\n\nNo code here.");
382        let exporter = NotebookExporter::from_literate(&doc);
383        // Should have at least some markdown content
384        assert!(exporter.cell_count() >= 1);
385        assert!(exporter.code_cells().is_empty());
386    }
387
388    #[test]
389    fn test_from_literate_raw_text() {
390        let doc = LiterateDocument::raw("Just plain text, no special parsing.");
391        let exporter = NotebookExporter::from_literate(&doc);
392        assert_eq!(exporter.cell_count(), 1);
393        assert_eq!(exporter.markdown_cells().len(), 1);
394    }
395
396    #[test]
397    fn test_from_literate_multiple_code_blocks() {
398        let doc = LiterateDocument::parse_markdown(
399            "Intro\n\n```python\nx = 1\n```\n\nMiddle text\n\n```python\ny = 2\n```\n\nEnd",
400        );
401        let exporter = NotebookExporter::from_literate(&doc);
402        assert_eq!(exporter.code_cells().len(), 2);
403    }
404
405    #[test]
406    fn test_notebook_metadata_with_values() {
407        let meta = NotebookMetadata {
408            title: Some("My Notebook".to_string()),
409            authors: vec!["Author A".to_string(), "Author B".to_string()],
410        };
411        assert_eq!(meta.title.as_deref(), Some("My Notebook"));
412        assert_eq!(meta.authors.len(), 2);
413    }
414
415    #[test]
416    fn test_notebook_metadata_serialization() {
417        let meta = NotebookMetadata {
418            title: Some("Test".to_string()),
419            authors: vec!["Alice".to_string()],
420        };
421        let json = serde_json::to_string(&meta).expect("serialize");
422        let restored: NotebookMetadata = serde_json::from_str(&json).expect("deserialize");
423        assert_eq!(restored.title, meta.title);
424        assert_eq!(restored.authors, meta.authors);
425    }
426
427    #[test]
428    fn test_notebook_metadata_serialization_skip_empty() {
429        let meta = NotebookMetadata::default();
430        let json = serde_json::to_string(&meta).expect("serialize");
431        // title should be omitted (skip_serializing_if = "Option::is_none")
432        assert!(!json.contains("title"));
433        // authors should be omitted (skip_serializing_if = "Vec::is_empty")
434        assert!(!json.contains("authors"));
435    }
436
437    #[test]
438    fn test_notebook_exporter_serialization() {
439        let mut exporter = NotebookExporter::new();
440        exporter.add_code("x = 1");
441        let json = serde_json::to_string(&exporter).expect("serialize");
442        let restored: NotebookExporter = serde_json::from_str(&json).expect("deserialize");
443        assert_eq!(restored.cell_count(), 1);
444        assert_eq!(restored.kernel.language, "python");
445    }
446
447    #[test]
448    fn test_code_cells_and_markdown_cells_filtering() {
449        let mut exporter = NotebookExporter::new();
450        exporter.add_code("a");
451        exporter.add_markdown("b");
452        exporter.add_code("c");
453        exporter.add_markdown("d");
454        exporter.add_code("e");
455
456        assert_eq!(exporter.code_cells().len(), 3);
457        assert_eq!(exporter.markdown_cells().len(), 2);
458        assert_eq!(exporter.cell_count(), 5);
459    }
460
461    #[test]
462    fn test_from_literate_typst() {
463        let doc = LiterateDocument::Typst(
464            "= Title\n\nSome text.\n\n```python\nprint('hi')\n```\n\nMore text.".to_string(),
465        );
466        let exporter = NotebookExporter::from_literate(&doc);
467        assert!(exporter.cell_count() > 0);
468    }
469}