1use crate::cell::{Cell, CellMetadata};
7use crate::metadata::NotebookMetadata;
8use crate::notebook::Notebook;
9use crate::output::Output;
10use std::collections::HashSet;
11
12#[derive(Debug, Clone, Default)]
34pub struct CleanOptions {
35 pub remove_outputs: bool,
37
38 pub remove_execution_counts: bool,
40
41 pub remove_cell_metadata: bool,
43
44 pub remove_notebook_metadata: bool,
47
48 pub remove_kernel_info: bool,
50
51 pub preserve_cell_ids: bool,
54
55 pub allowed_cell_metadata_keys: Option<HashSet<String>>,
59
60 pub allowed_notebook_metadata_keys: Option<HashSet<String>>,
64
65 pub remove_output_metadata: bool,
67
68 pub remove_output_execution_counts: bool,
70
71 pub normalize_cell_ids: bool,
74
75 pub sort_keys: bool,
78}
79
80impl CleanOptions {
81 pub fn new() -> Self {
83 Self::default()
84 }
85
86 pub fn for_vcs() -> Self {
91 Self {
92 remove_cell_metadata: true,
93 remove_execution_counts: true,
94 remove_output_metadata: true,
95 remove_output_execution_counts: true,
96 normalize_cell_ids: true,
97 sort_keys: true,
98 ..Default::default()
99 }
100 }
101
102 pub fn strip_all() -> Self {
106 Self {
107 remove_outputs: true,
108 remove_execution_counts: true,
109 remove_cell_metadata: true,
110 remove_notebook_metadata: true,
111 remove_kernel_info: true,
112 preserve_cell_ids: false,
113 allowed_cell_metadata_keys: None,
114 allowed_notebook_metadata_keys: None,
115 remove_output_metadata: true,
116 remove_output_execution_counts: true,
117 normalize_cell_ids: false,
118 sort_keys: false,
119 }
120 }
121}
122
123impl Notebook {
124 pub fn clean(&self, options: &CleanOptions) -> Notebook {
146 let cells = self
147 .cells
148 .iter()
149 .enumerate()
150 .map(|(index, cell)| clean_cell(cell, index, options))
151 .collect();
152 let metadata = clean_notebook_metadata(&self.metadata, options);
153
154 Notebook {
155 cells,
156 metadata,
157 nbformat: self.nbformat,
158 nbformat_minor: self.nbformat_minor,
159 sort_keys: options.sort_keys,
160 }
161 }
162}
163
164fn clean_cell(cell: &Cell, index: usize, options: &CleanOptions) -> Cell {
166 let compute_new_id = |original_id: &Option<String>| -> Option<String> {
168 if options.normalize_cell_ids {
169 Some(format!("cell{}", index))
170 } else if options.preserve_cell_ids {
171 original_id.clone()
172 } else {
173 None
174 }
175 };
176
177 match cell {
178 Cell::Code {
179 source,
180 execution_count,
181 outputs,
182 metadata,
183 id,
184 } => {
185 let new_execution_count = if options.remove_execution_counts {
186 None
187 } else {
188 *execution_count
189 };
190
191 let new_outputs = if options.remove_outputs {
192 Vec::new()
193 } else if options.remove_output_metadata || options.remove_output_execution_counts {
194 outputs.iter().map(|o| clean_output(o, options)).collect()
195 } else {
196 outputs.clone()
197 };
198
199 let new_metadata = clean_cell_metadata(metadata, options);
200 let new_id = compute_new_id(id);
201
202 Cell::Code {
203 source: source.clone(),
204 execution_count: new_execution_count,
205 outputs: new_outputs,
206 metadata: new_metadata,
207 id: new_id,
208 }
209 }
210 Cell::Markdown {
211 source,
212 metadata,
213 id,
214 } => {
215 let new_metadata = clean_cell_metadata(metadata, options);
216 let new_id = compute_new_id(id);
217
218 Cell::Markdown {
219 source: source.clone(),
220 metadata: new_metadata,
221 id: new_id,
222 }
223 }
224 Cell::Raw {
225 source,
226 metadata,
227 id,
228 } => {
229 let new_metadata = clean_cell_metadata(metadata, options);
230 let new_id = compute_new_id(id);
231
232 Cell::Raw {
233 source: source.clone(),
234 metadata: new_metadata,
235 id: new_id,
236 }
237 }
238 }
239}
240
241fn clean_cell_metadata(metadata: &CellMetadata, options: &CleanOptions) -> CellMetadata {
243 if options.remove_cell_metadata {
244 return CellMetadata::default();
245 }
246
247 if let Some(ref allowed_keys) = options.allowed_cell_metadata_keys {
249 let mut new_metadata = CellMetadata::default();
250
251 if allowed_keys.contains("tags") {
252 new_metadata.tags = metadata.tags.clone();
253 }
254 if allowed_keys.contains("collapsed") {
255 new_metadata.collapsed = metadata.collapsed;
256 }
257 if allowed_keys.contains("scrolled") {
258 new_metadata.scrolled = metadata.scrolled.clone();
259 }
260 if allowed_keys.contains("name") {
261 new_metadata.name = metadata.name.clone();
262 }
263
264 for (key, value) in &metadata.extra {
266 if allowed_keys.contains(key) {
267 new_metadata.extra.insert(key.clone(), value.clone());
268 }
269 }
270
271 new_metadata
272 } else {
273 metadata.clone()
274 }
275}
276
277fn clean_output(output: &Output, options: &CleanOptions) -> Output {
279 match output {
280 Output::ExecuteResult {
281 execution_count,
282 data,
283 metadata,
284 } => {
285 let new_execution_count = if options.remove_output_execution_counts {
286 None
287 } else {
288 *execution_count
289 };
290
291 let new_metadata = if options.remove_output_metadata {
292 Default::default()
293 } else {
294 metadata.clone()
295 };
296
297 Output::ExecuteResult {
298 execution_count: new_execution_count,
299 data: data.clone(),
300 metadata: new_metadata,
301 }
302 }
303 Output::DisplayData { data, metadata } => {
304 let new_metadata = if options.remove_output_metadata {
305 Default::default()
306 } else {
307 metadata.clone()
308 };
309
310 Output::DisplayData {
311 data: data.clone(),
312 metadata: new_metadata,
313 }
314 }
315 Output::Stream { .. } | Output::Error { .. } => output.clone(),
317 }
318}
319
320fn clean_notebook_metadata(
322 metadata: &NotebookMetadata,
323 options: &CleanOptions,
324) -> NotebookMetadata {
325 if options.remove_notebook_metadata && options.remove_kernel_info {
326 return NotebookMetadata::default();
327 }
328
329 let mut new_metadata = NotebookMetadata::default();
330
331 if !options.remove_kernel_info {
333 new_metadata.kernelspec = metadata.kernelspec.clone();
334 }
335
336 if !options.remove_notebook_metadata {
338 new_metadata.language_info = metadata.language_info.clone();
339
340 if let Some(ref allowed_keys) = options.allowed_notebook_metadata_keys {
342 for (key, value) in &metadata.extra {
343 if allowed_keys.contains(key) {
344 new_metadata.extra.insert(key.clone(), value.clone());
345 }
346 }
347 } else {
348 new_metadata.extra = metadata.extra.clone();
349 }
350 }
351
352 new_metadata
353}
354
355#[cfg(test)]
356mod tests {
357 use super::*;
358 use crate::metadata::KernelSpec;
359 use crate::output::{MultilineString, Output, StreamName};
360
361 fn create_test_notebook() -> Notebook {
362 let mut notebook = Notebook::new();
363
364 notebook.cells.push(Cell::Code {
366 source: MultilineString::from_string("print('hello')"),
367 execution_count: Some(1),
368 outputs: vec![Output::Stream {
369 name: StreamName::Stdout,
370 text: MultilineString::from_string("hello\n"),
371 }],
372 metadata: CellMetadata {
373 tags: Some(vec!["test".to_string()]),
374 collapsed: Some(false),
375 scrolled: None,
376 name: Some("test_cell".to_string()),
377 extra: Default::default(),
378 },
379 id: Some("cell-001".to_string()),
380 });
381
382 notebook.cells.push(Cell::Markdown {
384 source: MultilineString::from_string("# Hello"),
385 metadata: CellMetadata {
386 tags: Some(vec!["doc".to_string()]),
387 ..Default::default()
388 },
389 id: Some("cell-002".to_string()),
390 });
391
392 notebook.metadata.kernelspec = Some(KernelSpec {
394 name: "python3".to_string(),
395 display_name: "Python 3".to_string(),
396 language: "python".to_string(),
397 });
398
399 notebook
400 }
401
402 #[test]
403 fn test_clean_default_options_no_change() {
404 let notebook = create_test_notebook();
405 let options = CleanOptions::default();
406 let cleaned = notebook.clean(&options);
407
408 assert_eq!(cleaned.cells.len(), notebook.cells.len());
410
411 assert_eq!(
413 cleaned.cells[0].source_string(),
414 notebook.cells[0].source_string()
415 );
416
417 assert_eq!(
419 cleaned.cells[0].outputs().unwrap().len(),
420 notebook.cells[0].outputs().unwrap().len()
421 );
422
423 assert_eq!(
425 cleaned.cells[0].execution_count(),
426 notebook.cells[0].execution_count()
427 );
428
429 assert!(cleaned.cells[0].id().is_none());
431 }
432
433 #[test]
434 fn test_clean_remove_outputs() {
435 let notebook = create_test_notebook();
436 let options = CleanOptions {
437 remove_outputs: true,
438 ..Default::default()
439 };
440 let cleaned = notebook.clean(&options);
441
442 assert!(cleaned.cells[0].outputs().unwrap().is_empty());
444
445 assert_eq!(
447 cleaned.cells[0].source_string(),
448 notebook.cells[0].source_string()
449 );
450 assert_eq!(
451 cleaned.cells[0].execution_count(),
452 notebook.cells[0].execution_count()
453 );
454 }
455
456 #[test]
457 fn test_clean_remove_execution_counts() {
458 let notebook = create_test_notebook();
459 let options = CleanOptions {
460 remove_execution_counts: true,
461 ..Default::default()
462 };
463 let cleaned = notebook.clean(&options);
464
465 assert!(cleaned.cells[0].execution_count().is_none());
467
468 assert!(!cleaned.cells[0].outputs().unwrap().is_empty());
470 }
471
472 #[test]
473 fn test_clean_remove_cell_metadata() {
474 let notebook = create_test_notebook();
475 let options = CleanOptions {
476 remove_cell_metadata: true,
477 ..Default::default()
478 };
479 let cleaned = notebook.clean(&options);
480
481 let metadata = cleaned.cells[0].metadata();
483 assert!(metadata.tags.is_none());
484 assert!(metadata.collapsed.is_none());
485 assert!(metadata.name.is_none());
486 }
487
488 #[test]
489 fn test_clean_remove_notebook_metadata() {
490 let notebook = create_test_notebook();
491 let options = CleanOptions {
492 remove_notebook_metadata: true,
493 ..Default::default()
494 };
495 let cleaned = notebook.clean(&options);
496
497 assert!(cleaned.metadata.language_info.is_none());
499 assert!(cleaned.metadata.extra.is_empty());
500
501 assert!(cleaned.metadata.kernelspec.is_some());
503 }
504
505 #[test]
506 fn test_clean_remove_kernel_info() {
507 let notebook = create_test_notebook();
508 let options = CleanOptions {
509 remove_kernel_info: true,
510 ..Default::default()
511 };
512 let cleaned = notebook.clean(&options);
513
514 assert!(cleaned.metadata.kernelspec.is_none());
516 }
517
518 #[test]
519 fn test_clean_preserve_cell_ids() {
520 let notebook = create_test_notebook();
521 let options = CleanOptions {
522 preserve_cell_ids: true,
523 ..Default::default()
524 };
525 let cleaned = notebook.clean(&options);
526
527 assert_eq!(cleaned.cells[0].id(), Some("cell-001"));
529 assert_eq!(cleaned.cells[1].id(), Some("cell-002"));
530 }
531
532 #[test]
533 fn test_clean_allowed_cell_metadata_keys() {
534 let notebook = create_test_notebook();
535 let mut allowed_keys = HashSet::new();
536 allowed_keys.insert("tags".to_string());
537
538 let options = CleanOptions {
539 allowed_cell_metadata_keys: Some(allowed_keys),
540 ..Default::default()
541 };
542 let cleaned = notebook.clean(&options);
543
544 let metadata = cleaned.cells[0].metadata();
546 assert!(metadata.tags.is_some());
547 assert!(metadata.collapsed.is_none()); assert!(metadata.name.is_none()); }
550
551 #[test]
552 fn test_clean_for_vcs() {
553 let notebook = create_test_notebook();
554 let options = CleanOptions::for_vcs();
555 let cleaned = notebook.clean(&options);
556
557 assert!(cleaned.cells[0].execution_count().is_none());
559
560 assert!(!cleaned.cells[0].outputs().unwrap().is_empty());
562
563 let metadata = cleaned.cells[0].metadata();
565 assert!(metadata.tags.is_none());
566 assert!(metadata.collapsed.is_none());
567 assert!(metadata.name.is_none());
568
569 assert!(cleaned.metadata.kernelspec.is_some());
571 }
572
573 #[test]
574 fn test_clean_strip_all() {
575 let notebook = create_test_notebook();
576 let options = CleanOptions::strip_all();
577 let cleaned = notebook.clean(&options);
578
579 assert!(cleaned.cells[0].outputs().unwrap().is_empty());
581 assert!(cleaned.cells[0].execution_count().is_none());
582 assert!(cleaned.cells[0].id().is_none());
583 assert!(cleaned.cells[0].metadata().tags.is_none());
584 assert!(cleaned.metadata.kernelspec.is_none());
585 }
586
587 #[test]
588 fn test_clean_original_unchanged() {
589 let notebook = create_test_notebook();
590 let original_output_count = notebook.cells[0].outputs().unwrap().len();
591 let original_exec_count = notebook.cells[0].execution_count();
592
593 let options = CleanOptions::strip_all();
594 let _cleaned = notebook.clean(&options);
595
596 assert_eq!(
598 notebook.cells[0].outputs().unwrap().len(),
599 original_output_count
600 );
601 assert_eq!(notebook.cells[0].execution_count(), original_exec_count);
602 }
603
604 #[test]
605 fn test_clean_idempotent() {
606 let notebook = create_test_notebook();
607 let options = CleanOptions::strip_all();
608
609 let cleaned_once = notebook.clean(&options);
610 let cleaned_twice = cleaned_once.clean(&options);
611
612 assert_eq!(cleaned_once.cells.len(), cleaned_twice.cells.len());
614 for (c1, c2) in cleaned_once.cells.iter().zip(cleaned_twice.cells.iter()) {
615 assert_eq!(c1.source_string(), c2.source_string());
616 assert_eq!(c1.outputs(), c2.outputs());
617 assert_eq!(c1.execution_count(), c2.execution_count());
618 }
619 }
620
621 #[test]
622 fn test_clean_empty_notebook() {
623 let notebook = Notebook::new();
624 let options = CleanOptions::strip_all();
625 let cleaned = notebook.clean(&options);
626
627 assert!(cleaned.is_empty());
628 }
629
630 #[test]
631 fn test_clean_notebook_without_outputs() {
632 let mut notebook = Notebook::new();
633 notebook.cells.push(Cell::code("x = 1"));
634
635 let options = CleanOptions {
636 remove_outputs: true,
637 ..Default::default()
638 };
639 let cleaned = notebook.clean(&options);
640
641 assert!(cleaned.cells[0].outputs().unwrap().is_empty());
643 }
644
645 fn create_test_notebook_with_output_metadata() -> Notebook {
646 use crate::output::{MimeBundle, MimeData, OutputMetadata};
647 use indexmap::IndexMap;
648
649 let mut notebook = Notebook::new();
650
651 let mut output_metadata: OutputMetadata = IndexMap::new();
653 output_metadata.insert("foo".to_string(), serde_json::json!("bar"));
654
655 let mut data: MimeBundle = IndexMap::new();
657 data.insert("text/plain".to_string(), MimeData::String("42".to_string()));
658
659 notebook.cells.push(Cell::Code {
661 source: MultilineString::from_string("40 + 2"),
662 execution_count: Some(1),
663 outputs: vec![
664 Output::ExecuteResult {
665 execution_count: Some(1),
666 data: data.clone(),
667 metadata: output_metadata.clone(),
668 },
669 Output::DisplayData {
670 data: data.clone(),
671 metadata: output_metadata.clone(),
672 },
673 Output::Stream {
674 name: StreamName::Stdout,
675 text: MultilineString::from_string("hello\n"),
676 },
677 ],
678 metadata: Default::default(),
679 id: Some("cell-001".to_string()),
680 });
681
682 notebook
683 }
684
685 #[test]
686 fn test_clean_remove_output_metadata() {
687 let notebook = create_test_notebook_with_output_metadata();
688 let options = CleanOptions {
689 remove_output_metadata: true,
690 ..Default::default()
691 };
692 let cleaned = notebook.clean(&options);
693
694 let outputs = cleaned.cells[0].outputs().unwrap();
695 assert_eq!(outputs.len(), 3);
696
697 match &outputs[0] {
699 Output::ExecuteResult {
700 metadata,
701 execution_count,
702 ..
703 } => {
704 assert!(
705 metadata.is_empty(),
706 "ExecuteResult metadata should be empty"
707 );
708 assert_eq!(
709 *execution_count,
710 Some(1),
711 "execution_count should be preserved"
712 );
713 }
714 _ => panic!("Expected ExecuteResult"),
715 }
716
717 match &outputs[1] {
719 Output::DisplayData { metadata, .. } => {
720 assert!(metadata.is_empty(), "DisplayData metadata should be empty");
721 }
722 _ => panic!("Expected DisplayData"),
723 }
724
725 match &outputs[2] {
727 Output::Stream { name, text } => {
728 assert_eq!(*name, StreamName::Stdout);
729 assert_eq!(text.as_string(), "hello\n");
730 }
731 _ => panic!("Expected Stream"),
732 }
733 }
734
735 #[test]
736 fn test_clean_remove_output_execution_counts() {
737 let notebook = create_test_notebook_with_output_metadata();
738 let options = CleanOptions {
739 remove_output_execution_counts: true,
740 ..Default::default()
741 };
742 let cleaned = notebook.clean(&options);
743
744 let outputs = cleaned.cells[0].outputs().unwrap();
745
746 match &outputs[0] {
748 Output::ExecuteResult {
749 execution_count,
750 metadata,
751 ..
752 } => {
753 assert!(execution_count.is_none(), "execution_count should be None");
754 assert!(!metadata.is_empty(), "metadata should be preserved");
755 }
756 _ => panic!("Expected ExecuteResult"),
757 }
758
759 assert_eq!(cleaned.cells[0].execution_count(), Some(1));
761 }
762
763 #[test]
764 fn test_clean_remove_both_output_metadata_and_execution_counts() {
765 let notebook = create_test_notebook_with_output_metadata();
766 let options = CleanOptions {
767 remove_output_metadata: true,
768 remove_output_execution_counts: true,
769 ..Default::default()
770 };
771 let cleaned = notebook.clean(&options);
772
773 let outputs = cleaned.cells[0].outputs().unwrap();
774
775 match &outputs[0] {
777 Output::ExecuteResult {
778 execution_count,
779 metadata,
780 ..
781 } => {
782 assert!(execution_count.is_none(), "execution_count should be None");
783 assert!(metadata.is_empty(), "metadata should be empty");
784 }
785 _ => panic!("Expected ExecuteResult"),
786 }
787
788 match &outputs[1] {
790 Output::DisplayData { metadata, .. } => {
791 assert!(metadata.is_empty(), "metadata should be empty");
792 }
793 _ => panic!("Expected DisplayData"),
794 }
795 }
796
797 #[test]
798 fn test_clean_for_vcs_cleans_output_metadata() {
799 let notebook = create_test_notebook_with_output_metadata();
800 let options = CleanOptions::for_vcs();
801 let cleaned = notebook.clean(&options);
802
803 let outputs = cleaned.cells[0].outputs().unwrap();
805 assert_eq!(outputs.len(), 3, "outputs should be preserved");
806
807 match &outputs[0] {
808 Output::ExecuteResult {
809 execution_count,
810 metadata,
811 ..
812 } => {
813 assert!(
814 execution_count.is_none(),
815 "output execution_count should be None"
816 );
817 assert!(metadata.is_empty(), "output metadata should be empty");
818 }
819 _ => panic!("Expected ExecuteResult"),
820 }
821
822 assert_eq!(cleaned.cells[0].id(), Some("cell0"));
824 }
825
826 #[test]
827 fn test_clean_normalize_cell_ids() {
828 let notebook = create_test_notebook();
829 let options = CleanOptions {
830 normalize_cell_ids: true,
831 ..Default::default()
832 };
833 let cleaned = notebook.clean(&options);
834
835 assert_eq!(cleaned.cells[0].id(), Some("cell0"));
837 assert_eq!(cleaned.cells[1].id(), Some("cell1"));
838 }
839
840 #[test]
841 fn test_clean_normalize_cell_ids_without_original_ids() {
842 let mut notebook = Notebook::new();
843 notebook.cells.push(Cell::code("x = 1"));
845 notebook.cells.push(Cell::markdown("# Header"));
846 notebook.cells.push(Cell::raw("raw content"));
847
848 let options = CleanOptions {
849 normalize_cell_ids: true,
850 ..Default::default()
851 };
852 let cleaned = notebook.clean(&options);
853
854 assert_eq!(cleaned.cells[0].id(), Some("cell0"));
856 assert_eq!(cleaned.cells[1].id(), Some("cell1"));
857 assert_eq!(cleaned.cells[2].id(), Some("cell2"));
858 }
859
860 #[test]
861 fn test_clean_normalize_cell_ids_overrides_preserve() {
862 let notebook = create_test_notebook();
863 let options = CleanOptions {
864 normalize_cell_ids: true,
865 preserve_cell_ids: true, ..Default::default()
867 };
868 let cleaned = notebook.clean(&options);
869
870 assert_eq!(cleaned.cells[0].id(), Some("cell0"));
872 assert_eq!(cleaned.cells[1].id(), Some("cell1"));
873 }
874
875 #[test]
876 fn test_clean_for_vcs_normalizes_cell_ids() {
877 let notebook = create_test_notebook();
878 let options = CleanOptions::for_vcs();
879 let cleaned = notebook.clean(&options);
880
881 assert_eq!(cleaned.cells[0].id(), Some("cell0"));
883 assert_eq!(cleaned.cells[1].id(), Some("cell1"));
884 }
885
886 #[test]
887 fn test_clean_sort_keys() {
888 let notebook = create_test_notebook();
889 let options = CleanOptions {
890 sort_keys: true,
891 ..Default::default()
892 };
893 let cleaned = notebook.clean(&options);
894
895 assert!(cleaned.sort_keys);
897 }
898
899 #[test]
900 fn test_clean_sort_keys_default_false() {
901 let notebook = create_test_notebook();
902 let options = CleanOptions::default();
903 let cleaned = notebook.clean(&options);
904
905 assert!(!cleaned.sort_keys);
907 }
908
909 #[test]
910 fn test_clean_for_vcs_enables_sort_keys() {
911 let notebook = create_test_notebook();
912 let options = CleanOptions::for_vcs();
913 let cleaned = notebook.clean(&options);
914
915 assert!(cleaned.sort_keys);
917 }
918}