notebookx/
clean.rs

1//! Notebook cleaning functionality.
2//!
3//! This module provides the `CleanOptions` struct and the implementation
4//! of the `clean` method for removing various types of content from notebooks.
5
6use crate::cell::{Cell, CellMetadata};
7use crate::metadata::NotebookMetadata;
8use crate::notebook::Notebook;
9use crate::output::Output;
10use std::collections::HashSet;
11
12/// Options for cleaning a notebook.
13///
14/// All options default to `false`, meaning no cleaning is performed by default.
15/// Enable specific options to remove the corresponding content.
16///
17/// # Example
18///
19/// ```
20/// use notebookx::{Notebook, Cell, CleanOptions};
21///
22/// let mut notebook = Notebook::new();
23/// notebook.cells.push(Cell::code("print('hello')"));
24///
25/// let options = CleanOptions {
26///     remove_outputs: true,
27///     remove_execution_counts: true,
28///     ..Default::default()
29/// };
30///
31/// let cleaned = notebook.clean(&options);
32/// ```
33#[derive(Debug, Clone, Default)]
34pub struct CleanOptions {
35    /// Remove all outputs from code cells.
36    pub remove_outputs: bool,
37
38    /// Remove execution counts from code cells.
39    pub remove_execution_counts: bool,
40
41    /// Remove cell-level metadata (tags, collapsed, scrolled, name, extra).
42    pub remove_cell_metadata: bool,
43
44    /// Remove notebook-level metadata (language_info, extra fields).
45    /// Note: kernelspec is controlled separately by `remove_kernel_info`.
46    pub remove_notebook_metadata: bool,
47
48    /// Remove kernel specification from notebook metadata.
49    pub remove_kernel_info: bool,
50
51    /// Preserve cell IDs even when cleaning.
52    /// If false (default), cell IDs are removed during cleaning.
53    pub preserve_cell_ids: bool,
54
55    /// If set, only these metadata keys are preserved in cell metadata.
56    /// Other keys are removed. If None, all keys are preserved
57    /// (unless `remove_cell_metadata` is true).
58    pub allowed_cell_metadata_keys: Option<HashSet<String>>,
59
60    /// If set, only these metadata keys are preserved in notebook metadata.
61    /// Other keys are removed. If None, all keys are preserved
62    /// (unless `remove_notebook_metadata` is true).
63    pub allowed_notebook_metadata_keys: Option<HashSet<String>>,
64
65    /// Remove metadata from outputs (ExecuteResult, DisplayData).
66    pub remove_output_metadata: bool,
67
68    /// Remove execution counts from ExecuteResult outputs.
69    pub remove_output_execution_counts: bool,
70
71    /// Normalize cell IDs to "cell{i}" format where i is the cell index.
72    /// When enabled, all cells will have IDs like "cell0", "cell1", etc.
73    pub normalize_cell_ids: bool,
74
75    /// Sort JSON keys alphabetically when serializing.
76    /// This is useful for VCS to produce consistent diffs.
77    pub sort_keys: bool,
78}
79
80impl CleanOptions {
81    /// Create a new CleanOptions with all options disabled (no cleaning).
82    pub fn new() -> Self {
83        Self::default()
84    }
85
86    /// Create options that remove cell metadata and execution counts, and output metadata and execution counts.
87    ///
88    /// This is useful for preparing notebooks for version control.
89    /// Cell IDs are normalized to "cell{i}" format and JSON keys are sorted for consistent diffs.
90    pub fn for_vcs() -> Self {
91        Self {
92            remove_cell_metadata: true,
93            remove_execution_counts: true,
94            remove_output_metadata: true,
95            remove_output_execution_counts: true,
96            normalize_cell_ids: true,
97            sort_keys: true,
98            ..Default::default()
99        }
100    }
101
102    /// Create options that strip all metadata and outputs.
103    ///
104    /// This produces a minimal notebook with only cell content.
105    pub fn strip_all() -> Self {
106        Self {
107            remove_outputs: true,
108            remove_execution_counts: true,
109            remove_cell_metadata: true,
110            remove_notebook_metadata: true,
111            remove_kernel_info: true,
112            preserve_cell_ids: false,
113            allowed_cell_metadata_keys: None,
114            allowed_notebook_metadata_keys: None,
115            remove_output_metadata: true,
116            remove_output_execution_counts: true,
117            normalize_cell_ids: false,
118            sort_keys: false,
119        }
120    }
121}
122
123impl Notebook {
124    /// Clean the notebook according to the specified options.
125    ///
126    /// This method returns a new notebook with the requested content removed.
127    /// The original notebook is not modified.
128    ///
129    /// # Example
130    ///
131    /// ```
132    /// use notebookx::{Notebook, Cell, CleanOptions};
133    ///
134    /// let mut notebook = Notebook::new();
135    /// notebook.cells.push(Cell::code("x = 1"));
136    ///
137    /// let options = CleanOptions {
138    ///     remove_outputs: true,
139    ///     ..Default::default()
140    /// };
141    ///
142    /// let cleaned = notebook.clean(&options);
143    /// assert_eq!(notebook.len(), cleaned.len()); // Original unchanged
144    /// ```
145    pub fn clean(&self, options: &CleanOptions) -> Notebook {
146        let cells = self
147            .cells
148            .iter()
149            .enumerate()
150            .map(|(index, cell)| clean_cell(cell, index, options))
151            .collect();
152        let metadata = clean_notebook_metadata(&self.metadata, options);
153
154        Notebook {
155            cells,
156            metadata,
157            nbformat: self.nbformat,
158            nbformat_minor: self.nbformat_minor,
159            sort_keys: options.sort_keys,
160        }
161    }
162}
163
164/// Clean a single cell according to the options.
165fn clean_cell(cell: &Cell, index: usize, options: &CleanOptions) -> Cell {
166    // Determine the new cell ID based on options
167    let compute_new_id = |original_id: &Option<String>| -> Option<String> {
168        if options.normalize_cell_ids {
169            Some(format!("cell{}", index))
170        } else if options.preserve_cell_ids {
171            original_id.clone()
172        } else {
173            None
174        }
175    };
176
177    match cell {
178        Cell::Code {
179            source,
180            execution_count,
181            outputs,
182            metadata,
183            id,
184        } => {
185            let new_execution_count = if options.remove_execution_counts {
186                None
187            } else {
188                *execution_count
189            };
190
191            let new_outputs = if options.remove_outputs {
192                Vec::new()
193            } else if options.remove_output_metadata || options.remove_output_execution_counts {
194                outputs.iter().map(|o| clean_output(o, options)).collect()
195            } else {
196                outputs.clone()
197            };
198
199            let new_metadata = clean_cell_metadata(metadata, options);
200            let new_id = compute_new_id(id);
201
202            Cell::Code {
203                source: source.clone(),
204                execution_count: new_execution_count,
205                outputs: new_outputs,
206                metadata: new_metadata,
207                id: new_id,
208            }
209        }
210        Cell::Markdown {
211            source,
212            metadata,
213            id,
214        } => {
215            let new_metadata = clean_cell_metadata(metadata, options);
216            let new_id = compute_new_id(id);
217
218            Cell::Markdown {
219                source: source.clone(),
220                metadata: new_metadata,
221                id: new_id,
222            }
223        }
224        Cell::Raw {
225            source,
226            metadata,
227            id,
228        } => {
229            let new_metadata = clean_cell_metadata(metadata, options);
230            let new_id = compute_new_id(id);
231
232            Cell::Raw {
233                source: source.clone(),
234                metadata: new_metadata,
235                id: new_id,
236            }
237        }
238    }
239}
240
241/// Clean cell metadata according to the options.
242fn clean_cell_metadata(metadata: &CellMetadata, options: &CleanOptions) -> CellMetadata {
243    if options.remove_cell_metadata {
244        return CellMetadata::default();
245    }
246
247    // If allowed_cell_metadata_keys is set, filter the metadata
248    if let Some(ref allowed_keys) = options.allowed_cell_metadata_keys {
249        let mut new_metadata = CellMetadata::default();
250
251        if allowed_keys.contains("tags") {
252            new_metadata.tags = metadata.tags.clone();
253        }
254        if allowed_keys.contains("collapsed") {
255            new_metadata.collapsed = metadata.collapsed;
256        }
257        if allowed_keys.contains("scrolled") {
258            new_metadata.scrolled = metadata.scrolled.clone();
259        }
260        if allowed_keys.contains("name") {
261            new_metadata.name = metadata.name.clone();
262        }
263
264        // Filter extra fields
265        for (key, value) in &metadata.extra {
266            if allowed_keys.contains(key) {
267                new_metadata.extra.insert(key.clone(), value.clone());
268            }
269        }
270
271        new_metadata
272    } else {
273        metadata.clone()
274    }
275}
276
277/// Clean a single output according to the options.
278fn clean_output(output: &Output, options: &CleanOptions) -> Output {
279    match output {
280        Output::ExecuteResult {
281            execution_count,
282            data,
283            metadata,
284        } => {
285            let new_execution_count = if options.remove_output_execution_counts {
286                None
287            } else {
288                *execution_count
289            };
290
291            let new_metadata = if options.remove_output_metadata {
292                Default::default()
293            } else {
294                metadata.clone()
295            };
296
297            Output::ExecuteResult {
298                execution_count: new_execution_count,
299                data: data.clone(),
300                metadata: new_metadata,
301            }
302        }
303        Output::DisplayData { data, metadata } => {
304            let new_metadata = if options.remove_output_metadata {
305                Default::default()
306            } else {
307                metadata.clone()
308            };
309
310            Output::DisplayData {
311                data: data.clone(),
312                metadata: new_metadata,
313            }
314        }
315        // Stream and Error outputs don't have metadata or execution_count
316        Output::Stream { .. } | Output::Error { .. } => output.clone(),
317    }
318}
319
320/// Clean notebook metadata according to the options.
321fn clean_notebook_metadata(
322    metadata: &NotebookMetadata,
323    options: &CleanOptions,
324) -> NotebookMetadata {
325    if options.remove_notebook_metadata && options.remove_kernel_info {
326        return NotebookMetadata::default();
327    }
328
329    let mut new_metadata = NotebookMetadata::default();
330
331    // Handle kernelspec
332    if !options.remove_kernel_info {
333        new_metadata.kernelspec = metadata.kernelspec.clone();
334    }
335
336    // Handle other metadata (language_info, extra)
337    if !options.remove_notebook_metadata {
338        new_metadata.language_info = metadata.language_info.clone();
339
340        // If allowed_notebook_metadata_keys is set, filter extra fields
341        if let Some(ref allowed_keys) = options.allowed_notebook_metadata_keys {
342            for (key, value) in &metadata.extra {
343                if allowed_keys.contains(key) {
344                    new_metadata.extra.insert(key.clone(), value.clone());
345                }
346            }
347        } else {
348            new_metadata.extra = metadata.extra.clone();
349        }
350    }
351
352    new_metadata
353}
354
355#[cfg(test)]
356mod tests {
357    use super::*;
358    use crate::metadata::KernelSpec;
359    use crate::output::{MultilineString, Output, StreamName};
360
361    fn create_test_notebook() -> Notebook {
362        let mut notebook = Notebook::new();
363
364        // Add a code cell with outputs
365        notebook.cells.push(Cell::Code {
366            source: MultilineString::from_string("print('hello')"),
367            execution_count: Some(1),
368            outputs: vec![Output::Stream {
369                name: StreamName::Stdout,
370                text: MultilineString::from_string("hello\n"),
371            }],
372            metadata: CellMetadata {
373                tags: Some(vec!["test".to_string()]),
374                collapsed: Some(false),
375                scrolled: None,
376                name: Some("test_cell".to_string()),
377                extra: Default::default(),
378            },
379            id: Some("cell-001".to_string()),
380        });
381
382        // Add a markdown cell
383        notebook.cells.push(Cell::Markdown {
384            source: MultilineString::from_string("# Hello"),
385            metadata: CellMetadata {
386                tags: Some(vec!["doc".to_string()]),
387                ..Default::default()
388            },
389            id: Some("cell-002".to_string()),
390        });
391
392        // Set notebook metadata
393        notebook.metadata.kernelspec = Some(KernelSpec {
394            name: "python3".to_string(),
395            display_name: "Python 3".to_string(),
396            language: "python".to_string(),
397        });
398
399        notebook
400    }
401
402    #[test]
403    fn test_clean_default_options_no_change() {
404        let notebook = create_test_notebook();
405        let options = CleanOptions::default();
406        let cleaned = notebook.clean(&options);
407
408        // With default options (except cell IDs are removed by default)
409        assert_eq!(cleaned.cells.len(), notebook.cells.len());
410
411        // Cell content should be preserved
412        assert_eq!(
413            cleaned.cells[0].source_string(),
414            notebook.cells[0].source_string()
415        );
416
417        // Outputs should be preserved
418        assert_eq!(
419            cleaned.cells[0].outputs().unwrap().len(),
420            notebook.cells[0].outputs().unwrap().len()
421        );
422
423        // Execution count should be preserved
424        assert_eq!(
425            cleaned.cells[0].execution_count(),
426            notebook.cells[0].execution_count()
427        );
428
429        // Cell IDs are removed by default
430        assert!(cleaned.cells[0].id().is_none());
431    }
432
433    #[test]
434    fn test_clean_remove_outputs() {
435        let notebook = create_test_notebook();
436        let options = CleanOptions {
437            remove_outputs: true,
438            ..Default::default()
439        };
440        let cleaned = notebook.clean(&options);
441
442        // Outputs should be removed
443        assert!(cleaned.cells[0].outputs().unwrap().is_empty());
444
445        // Other content should be preserved
446        assert_eq!(
447            cleaned.cells[0].source_string(),
448            notebook.cells[0].source_string()
449        );
450        assert_eq!(
451            cleaned.cells[0].execution_count(),
452            notebook.cells[0].execution_count()
453        );
454    }
455
456    #[test]
457    fn test_clean_remove_execution_counts() {
458        let notebook = create_test_notebook();
459        let options = CleanOptions {
460            remove_execution_counts: true,
461            ..Default::default()
462        };
463        let cleaned = notebook.clean(&options);
464
465        // Execution count should be removed
466        assert!(cleaned.cells[0].execution_count().is_none());
467
468        // Outputs should be preserved
469        assert!(!cleaned.cells[0].outputs().unwrap().is_empty());
470    }
471
472    #[test]
473    fn test_clean_remove_cell_metadata() {
474        let notebook = create_test_notebook();
475        let options = CleanOptions {
476            remove_cell_metadata: true,
477            ..Default::default()
478        };
479        let cleaned = notebook.clean(&options);
480
481        // Cell metadata should be empty
482        let metadata = cleaned.cells[0].metadata();
483        assert!(metadata.tags.is_none());
484        assert!(metadata.collapsed.is_none());
485        assert!(metadata.name.is_none());
486    }
487
488    #[test]
489    fn test_clean_remove_notebook_metadata() {
490        let notebook = create_test_notebook();
491        let options = CleanOptions {
492            remove_notebook_metadata: true,
493            ..Default::default()
494        };
495        let cleaned = notebook.clean(&options);
496
497        // Language info and extra should be removed
498        assert!(cleaned.metadata.language_info.is_none());
499        assert!(cleaned.metadata.extra.is_empty());
500
501        // Kernelspec should be preserved (controlled separately)
502        assert!(cleaned.metadata.kernelspec.is_some());
503    }
504
505    #[test]
506    fn test_clean_remove_kernel_info() {
507        let notebook = create_test_notebook();
508        let options = CleanOptions {
509            remove_kernel_info: true,
510            ..Default::default()
511        };
512        let cleaned = notebook.clean(&options);
513
514        // Kernelspec should be removed
515        assert!(cleaned.metadata.kernelspec.is_none());
516    }
517
518    #[test]
519    fn test_clean_preserve_cell_ids() {
520        let notebook = create_test_notebook();
521        let options = CleanOptions {
522            preserve_cell_ids: true,
523            ..Default::default()
524        };
525        let cleaned = notebook.clean(&options);
526
527        // Cell IDs should be preserved
528        assert_eq!(cleaned.cells[0].id(), Some("cell-001"));
529        assert_eq!(cleaned.cells[1].id(), Some("cell-002"));
530    }
531
532    #[test]
533    fn test_clean_allowed_cell_metadata_keys() {
534        let notebook = create_test_notebook();
535        let mut allowed_keys = HashSet::new();
536        allowed_keys.insert("tags".to_string());
537
538        let options = CleanOptions {
539            allowed_cell_metadata_keys: Some(allowed_keys),
540            ..Default::default()
541        };
542        let cleaned = notebook.clean(&options);
543
544        // Only tags should be preserved
545        let metadata = cleaned.cells[0].metadata();
546        assert!(metadata.tags.is_some());
547        assert!(metadata.collapsed.is_none()); // Not in allowed keys
548        assert!(metadata.name.is_none()); // Not in allowed keys
549    }
550
551    #[test]
552    fn test_clean_for_vcs() {
553        let notebook = create_test_notebook();
554        let options = CleanOptions::for_vcs();
555        let cleaned = notebook.clean(&options);
556
557        // Execution counts should be removed
558        assert!(cleaned.cells[0].execution_count().is_none());
559
560        // Outputs should be preserved (for_vcs doesn't remove outputs)
561        assert!(!cleaned.cells[0].outputs().unwrap().is_empty());
562
563        // Cell metadata should be removed
564        let metadata = cleaned.cells[0].metadata();
565        assert!(metadata.tags.is_none());
566        assert!(metadata.collapsed.is_none());
567        assert!(metadata.name.is_none());
568
569        // Notebook metadata (kernelspec) should be preserved
570        assert!(cleaned.metadata.kernelspec.is_some());
571    }
572
573    #[test]
574    fn test_clean_strip_all() {
575        let notebook = create_test_notebook();
576        let options = CleanOptions::strip_all();
577        let cleaned = notebook.clean(&options);
578
579        // Everything should be stripped except content
580        assert!(cleaned.cells[0].outputs().unwrap().is_empty());
581        assert!(cleaned.cells[0].execution_count().is_none());
582        assert!(cleaned.cells[0].id().is_none());
583        assert!(cleaned.cells[0].metadata().tags.is_none());
584        assert!(cleaned.metadata.kernelspec.is_none());
585    }
586
587    #[test]
588    fn test_clean_original_unchanged() {
589        let notebook = create_test_notebook();
590        let original_output_count = notebook.cells[0].outputs().unwrap().len();
591        let original_exec_count = notebook.cells[0].execution_count();
592
593        let options = CleanOptions::strip_all();
594        let _cleaned = notebook.clean(&options);
595
596        // Original should be unchanged
597        assert_eq!(
598            notebook.cells[0].outputs().unwrap().len(),
599            original_output_count
600        );
601        assert_eq!(notebook.cells[0].execution_count(), original_exec_count);
602    }
603
604    #[test]
605    fn test_clean_idempotent() {
606        let notebook = create_test_notebook();
607        let options = CleanOptions::strip_all();
608
609        let cleaned_once = notebook.clean(&options);
610        let cleaned_twice = cleaned_once.clean(&options);
611
612        // Cleaning twice should produce the same result
613        assert_eq!(cleaned_once.cells.len(), cleaned_twice.cells.len());
614        for (c1, c2) in cleaned_once.cells.iter().zip(cleaned_twice.cells.iter()) {
615            assert_eq!(c1.source_string(), c2.source_string());
616            assert_eq!(c1.outputs(), c2.outputs());
617            assert_eq!(c1.execution_count(), c2.execution_count());
618        }
619    }
620
621    #[test]
622    fn test_clean_empty_notebook() {
623        let notebook = Notebook::new();
624        let options = CleanOptions::strip_all();
625        let cleaned = notebook.clean(&options);
626
627        assert!(cleaned.is_empty());
628    }
629
630    #[test]
631    fn test_clean_notebook_without_outputs() {
632        let mut notebook = Notebook::new();
633        notebook.cells.push(Cell::code("x = 1"));
634
635        let options = CleanOptions {
636            remove_outputs: true,
637            ..Default::default()
638        };
639        let cleaned = notebook.clean(&options);
640
641        // Should work fine even without outputs
642        assert!(cleaned.cells[0].outputs().unwrap().is_empty());
643    }
644
645    fn create_test_notebook_with_output_metadata() -> Notebook {
646        use crate::output::{MimeBundle, MimeData, OutputMetadata};
647        use indexmap::IndexMap;
648
649        let mut notebook = Notebook::new();
650
651        // Create output metadata
652        let mut output_metadata: OutputMetadata = IndexMap::new();
653        output_metadata.insert("foo".to_string(), serde_json::json!("bar"));
654
655        // Create MIME bundle
656        let mut data: MimeBundle = IndexMap::new();
657        data.insert("text/plain".to_string(), MimeData::String("42".to_string()));
658
659        // Add a code cell with ExecuteResult output (has metadata and execution_count)
660        notebook.cells.push(Cell::Code {
661            source: MultilineString::from_string("40 + 2"),
662            execution_count: Some(1),
663            outputs: vec![
664                Output::ExecuteResult {
665                    execution_count: Some(1),
666                    data: data.clone(),
667                    metadata: output_metadata.clone(),
668                },
669                Output::DisplayData {
670                    data: data.clone(),
671                    metadata: output_metadata.clone(),
672                },
673                Output::Stream {
674                    name: StreamName::Stdout,
675                    text: MultilineString::from_string("hello\n"),
676                },
677            ],
678            metadata: Default::default(),
679            id: Some("cell-001".to_string()),
680        });
681
682        notebook
683    }
684
685    #[test]
686    fn test_clean_remove_output_metadata() {
687        let notebook = create_test_notebook_with_output_metadata();
688        let options = CleanOptions {
689            remove_output_metadata: true,
690            ..Default::default()
691        };
692        let cleaned = notebook.clean(&options);
693
694        let outputs = cleaned.cells[0].outputs().unwrap();
695        assert_eq!(outputs.len(), 3);
696
697        // ExecuteResult should have empty metadata
698        match &outputs[0] {
699            Output::ExecuteResult {
700                metadata,
701                execution_count,
702                ..
703            } => {
704                assert!(
705                    metadata.is_empty(),
706                    "ExecuteResult metadata should be empty"
707                );
708                assert_eq!(
709                    *execution_count,
710                    Some(1),
711                    "execution_count should be preserved"
712                );
713            }
714            _ => panic!("Expected ExecuteResult"),
715        }
716
717        // DisplayData should have empty metadata
718        match &outputs[1] {
719            Output::DisplayData { metadata, .. } => {
720                assert!(metadata.is_empty(), "DisplayData metadata should be empty");
721            }
722            _ => panic!("Expected DisplayData"),
723        }
724
725        // Stream should be unchanged (doesn't have metadata)
726        match &outputs[2] {
727            Output::Stream { name, text } => {
728                assert_eq!(*name, StreamName::Stdout);
729                assert_eq!(text.as_string(), "hello\n");
730            }
731            _ => panic!("Expected Stream"),
732        }
733    }
734
735    #[test]
736    fn test_clean_remove_output_execution_counts() {
737        let notebook = create_test_notebook_with_output_metadata();
738        let options = CleanOptions {
739            remove_output_execution_counts: true,
740            ..Default::default()
741        };
742        let cleaned = notebook.clean(&options);
743
744        let outputs = cleaned.cells[0].outputs().unwrap();
745
746        // ExecuteResult should have None execution_count
747        match &outputs[0] {
748            Output::ExecuteResult {
749                execution_count,
750                metadata,
751                ..
752            } => {
753                assert!(execution_count.is_none(), "execution_count should be None");
754                assert!(!metadata.is_empty(), "metadata should be preserved");
755            }
756            _ => panic!("Expected ExecuteResult"),
757        }
758
759        // Cell's execution_count should be preserved (not output execution_count)
760        assert_eq!(cleaned.cells[0].execution_count(), Some(1));
761    }
762
763    #[test]
764    fn test_clean_remove_both_output_metadata_and_execution_counts() {
765        let notebook = create_test_notebook_with_output_metadata();
766        let options = CleanOptions {
767            remove_output_metadata: true,
768            remove_output_execution_counts: true,
769            ..Default::default()
770        };
771        let cleaned = notebook.clean(&options);
772
773        let outputs = cleaned.cells[0].outputs().unwrap();
774
775        // ExecuteResult should have empty metadata and None execution_count
776        match &outputs[0] {
777            Output::ExecuteResult {
778                execution_count,
779                metadata,
780                ..
781            } => {
782                assert!(execution_count.is_none(), "execution_count should be None");
783                assert!(metadata.is_empty(), "metadata should be empty");
784            }
785            _ => panic!("Expected ExecuteResult"),
786        }
787
788        // DisplayData should have empty metadata
789        match &outputs[1] {
790            Output::DisplayData { metadata, .. } => {
791                assert!(metadata.is_empty(), "metadata should be empty");
792            }
793            _ => panic!("Expected DisplayData"),
794        }
795    }
796
797    #[test]
798    fn test_clean_for_vcs_cleans_output_metadata() {
799        let notebook = create_test_notebook_with_output_metadata();
800        let options = CleanOptions::for_vcs();
801        let cleaned = notebook.clean(&options);
802
803        // Verify for_vcs cleans output metadata and execution counts
804        let outputs = cleaned.cells[0].outputs().unwrap();
805        assert_eq!(outputs.len(), 3, "outputs should be preserved");
806
807        match &outputs[0] {
808            Output::ExecuteResult {
809                execution_count,
810                metadata,
811                ..
812            } => {
813                assert!(
814                    execution_count.is_none(),
815                    "output execution_count should be None"
816                );
817                assert!(metadata.is_empty(), "output metadata should be empty");
818            }
819            _ => panic!("Expected ExecuteResult"),
820        }
821
822        // Verify for_vcs normalizes cell IDs
823        assert_eq!(cleaned.cells[0].id(), Some("cell0"));
824    }
825
826    #[test]
827    fn test_clean_normalize_cell_ids() {
828        let notebook = create_test_notebook();
829        let options = CleanOptions {
830            normalize_cell_ids: true,
831            ..Default::default()
832        };
833        let cleaned = notebook.clean(&options);
834
835        // Cell IDs should be normalized to "cell{i}" format
836        assert_eq!(cleaned.cells[0].id(), Some("cell0"));
837        assert_eq!(cleaned.cells[1].id(), Some("cell1"));
838    }
839
840    #[test]
841    fn test_clean_normalize_cell_ids_without_original_ids() {
842        let mut notebook = Notebook::new();
843        // Create cells without IDs
844        notebook.cells.push(Cell::code("x = 1"));
845        notebook.cells.push(Cell::markdown("# Header"));
846        notebook.cells.push(Cell::raw("raw content"));
847
848        let options = CleanOptions {
849            normalize_cell_ids: true,
850            ..Default::default()
851        };
852        let cleaned = notebook.clean(&options);
853
854        // Cell IDs should be normalized even when original cells had no IDs
855        assert_eq!(cleaned.cells[0].id(), Some("cell0"));
856        assert_eq!(cleaned.cells[1].id(), Some("cell1"));
857        assert_eq!(cleaned.cells[2].id(), Some("cell2"));
858    }
859
860    #[test]
861    fn test_clean_normalize_cell_ids_overrides_preserve() {
862        let notebook = create_test_notebook();
863        let options = CleanOptions {
864            normalize_cell_ids: true,
865            preserve_cell_ids: true, // This should be ignored when normalize is true
866            ..Default::default()
867        };
868        let cleaned = notebook.clean(&options);
869
870        // normalize_cell_ids takes precedence over preserve_cell_ids
871        assert_eq!(cleaned.cells[0].id(), Some("cell0"));
872        assert_eq!(cleaned.cells[1].id(), Some("cell1"));
873    }
874
875    #[test]
876    fn test_clean_for_vcs_normalizes_cell_ids() {
877        let notebook = create_test_notebook();
878        let options = CleanOptions::for_vcs();
879        let cleaned = notebook.clean(&options);
880
881        // for_vcs should normalize cell IDs
882        assert_eq!(cleaned.cells[0].id(), Some("cell0"));
883        assert_eq!(cleaned.cells[1].id(), Some("cell1"));
884    }
885
886    #[test]
887    fn test_clean_sort_keys() {
888        let notebook = create_test_notebook();
889        let options = CleanOptions {
890            sort_keys: true,
891            ..Default::default()
892        };
893        let cleaned = notebook.clean(&options);
894
895        // sort_keys should be set on the cleaned notebook
896        assert!(cleaned.sort_keys);
897    }
898
899    #[test]
900    fn test_clean_sort_keys_default_false() {
901        let notebook = create_test_notebook();
902        let options = CleanOptions::default();
903        let cleaned = notebook.clean(&options);
904
905        // sort_keys should be false by default
906        assert!(!cleaned.sort_keys);
907    }
908
909    #[test]
910    fn test_clean_for_vcs_enables_sort_keys() {
911        let notebook = create_test_notebook();
912        let options = CleanOptions::for_vcs();
913        let cleaned = notebook.clean(&options);
914
915        // for_vcs should enable sort_keys
916        assert!(cleaned.sort_keys);
917    }
918}