nbwipers 0.6.1

Wipe clean your Jupyter Notebooks!
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
use std::path::PathBuf;

use clap::{
    builder::{styling::AnsiColor, Styles},
    command, Parser, Subcommand, ValueEnum,
};

use crate::{
    config::{Configuration, FilePattern, IdAction},
    extra_keys::ExtraKey,
};
const STYLES: Styles = Styles::styled()
    .header(AnsiColor::Yellow.on_default())
    .usage(AnsiColor::Green.on_default())
    .literal(AnsiColor::Green.on_default())
    .placeholder(AnsiColor::Green.on_default());

#[derive(Parser, Debug, Clone)]
#[command(author, version, about, long_about = None, styles=STYLES)]
pub struct Cli {
    #[arg(long, hide = true)]
    pub markdown_help: bool,
    #[command(subcommand)]
    pub command: Commands,
}
#[allow(clippy::struct_excessive_bools)]
#[derive(Parser, Debug, Clone)]
pub struct CommonArgs {
    /// path to pyproject.toml/.nbwipers.toml/nbwipers.toml file containing nbwipers settings. If not given use the file in the current working directory or the first such file in its containing folders.
    #[arg(long, short)]
    pub config: Option<PathBuf>,
    /// Ignore all configuration files.
    #[arg(long, conflicts_with = "config")]
    pub isolated: bool,

    /// Do not return an error if no notebooks are found
    #[arg(long)]
    pub allow_no_notebooks: bool,

    /// extra keys to remove in the notebook or cell metadata, separated by commas. Must start with `metadata` or `cell.metadata`
    #[arg(long, value_delimiter = ',')]
    pub extra_keys: Option<Vec<ExtraKey>>,

    /// drop empty cells. Disable with `--keep-empty-cells`
    #[arg(long, overrides_with("keep_empty_cells"))]
    pub drop_empty_cells: bool,

    #[arg(long, overrides_with("drop_empty_cells"), hide = true)]
    pub keep_empty_cells: bool,

    /// keep cell output. Disable with `--drop-output`
    #[arg(long, overrides_with("drop_output"))]
    pub keep_output: bool,

    #[arg(long, overrides_with("keep_output"), hide = true)]
    pub drop_output: bool,

    /// keep cell execution count. Disable with `--drop count`
    #[arg(long, overrides_with("drop_count"))]
    pub keep_count: bool,

    #[arg(long, overrides_with("keep_count"), hide = true)]
    pub drop_count: bool,

    /// remove cell ids and downgrade to nbformat 4.4. Conflicts with `--keep-id` and `--sequential-id`. Equivalent to `--id-action=drop`
    #[arg(
        long,
        overrides_with("keep_id"),
        overrides_with("sequential_id"),
        overrides_with("id_action")
    )]
    pub drop_id: bool,

    /// keep cell ids (default). Conflicts with `--sequential-id` and `--drop-id`. Equivalent to `--id-action=keep`
    #[arg(
        long,
        overrides_with("drop_id"),
        overrides_with("sequential_id"),
        overrides_with("id_action")
    )]
    pub keep_id: bool,

    /// replace cell ids with sequential ids. Conflicts with `--keep-id` and `--drop-id`. Equivalent to `--id-action=sequential`
    #[arg(
        long,
        overrides_with("drop_id"),
        overrides_with("keep_id"),
        overrides_with("id_action")
    )]
    pub sequential_id: bool,

    /// Specify what action to take on cell ids. `drop` to remove, `sequential` to replace with sequential ids and `keep` to do nothing. Equivalent to `--drop-id`, `--sequential-id` and `--keep-id` respectively.
    #[arg(
        long,
        overrides_with("drop_id"),
        overrides_with("keep_id"),
        overrides_with("sequential_id")
    )]
    pub id_action: Option<IdAction>,

    /// Strip init cell. Disable with `--keep-init-cell`
    #[arg(long, overrides_with("keep_init_cell"))]
    pub strip_init_cell: bool,

    #[arg(long, overrides_with("strip_init_cell"), hide = true)]
    pub keep_init_cell: bool,
    /// Strip kernel info. Namely, metadata.kernelspec and metadata.language_info.python_version. Disable with `--keep-kernel-info`
    #[arg(long, overrides_with("keep_kernel_info"))]
    pub strip_kernel_info: bool,

    #[arg(long, overrides_with("strip_kernel_info"), hide = true)]
    pub keep_kernel_info: bool,

    /// comma-separated list of tags that will cause the cell to be dropped
    #[arg(long, value_delimiter = ',')]
    pub drop_tagged_cells: Option<Vec<String>>,

    /// List of metadata keys that should be kept, regardless of if they appear in
    #[arg(long, value_delimiter = ',')]
    pub keep_keys: Option<Vec<ExtraKey>>,
    /// List of file patterns to ignore
    #[arg(long, value_delimiter = ',')]
    pub exclude: Option<Vec<FilePattern>>,
    /// List of additional file patterns to ignore
    #[arg(long, value_delimiter = ',')]
    pub extend_exclude: Option<Vec<FilePattern>>,
}

#[derive(Subcommand, Debug, Clone)]
pub enum Commands {
    /// Register nbwipers as a git filter for `ipynb` files
    Install(InstallCommand),
    /// clean all notebooks in a given path
    CleanAll(CleanAllCommand),
    /// check notebooks in a given path for elements that would be removed by `clean`
    Check(CheckCommand),
    /// clean a single notebook
    Clean(CleanCommand),
    /// uninstall nbwipers as a git filter
    Uninstall(UninstallCommand),
    /// check whether nbwipers is setup as a git filter
    CheckInstall(CheckInstallCommand),
    /// Show configuration
    ShowConfig(ShowConfigCommand),
    /// Record Kernelspec metadata for notebooks
    Record(RecordCommand),
    /// Add back kernelspec metadata to the notebook as a smudge
    #[clap(hide(true))]
    Smudge(SmudgeCommand),
    /// Commands for pre-commit hooks
    #[command(subcommand)]
    Hook(HookCommands),
}

#[derive(Subcommand, Debug, Clone)]
pub enum HookCommands {
    /// Check for large files, but measure ipynb sizes after cleaning
    CheckLargeFiles(CheckLargeFilesCommand),
}

#[derive(Clone, Debug, Parser)]
pub struct CheckLargeFilesCommand {
    /// Files to check for large files.
    pub filenames: Vec<PathBuf>,
    /// Check all files not just staged files
    #[arg(long, action)]
    pub enforce_all: bool,
    /// Max size in KB to consider a file large
    #[arg(long("maxkb"))]
    pub maxkb: Option<u64>,
    /// path to pyproject.toml/.nbwipers.toml/nbwipers.toml file containing nbwipers settings. If not given use the file in the current working directory or the first such file in its containing folders.
    #[arg(long, short)]
    pub config: Option<PathBuf>,

    /// Ignore all configuration files.
    #[arg(long, conflicts_with = "config")]
    pub isolated: bool,
}

#[derive(Clone, Debug, Parser)]
pub struct ShowConfigCommand {
    /// Show all config including defaults Disable with `--no-show-defaults`
    #[arg(long, overrides_with("no_show_defaults"))]
    pub show_all: bool,

    #[arg(long, overrides_with("show_all"), hide = true)]
    pub no_show_defaults: bool,
    #[clap(flatten)]
    pub common: CommonArgs,
}
#[derive(Clone, Debug, Parser)]
pub struct CheckCommand {
    /// paths containing ipynb files to check. Use `-` to read from stdin
    pub files: Vec<PathBuf>,

    /// desired output format for diagnostics
    #[arg(long, short)]
    pub output_format: Option<OutputFormat>,

    /// Name of file if stdin is used
    #[arg(long)]
    pub stdin_file_name: Option<PathBuf>,

    #[clap(flatten)]
    pub common: CommonArgs,
}
#[derive(Clone, Debug, Parser)]
pub struct CleanCommand {
    /// path to ipynb file to clean. Use `-` to read from stdin and write to stdout
    pub file: PathBuf,

    /// write cleaned file to stdout instead of to the file
    #[arg(long, short)]
    pub textconv: bool,

    /// Name of file if stdin is used
    #[arg(long)]
    pub stdin_file_name: Option<PathBuf>,

    /// If true, and the file is excluded, do not execute clean
    #[arg(long)]
    pub respect_exclusions: bool,

    #[clap(flatten)]
    pub common: CommonArgs,
}
#[derive(Clone, Debug, Parser)]
pub struct CleanAllCommand {
    /// paths containing ipynb files to clean. Stdin is not supported.
    pub files: Vec<PathBuf>,

    /// set to true to avoid writing to files
    #[arg(long, short)]
    pub dry_run: bool,

    /// skip confirmation and assume yes
    #[arg(long, short)]
    pub yes: bool,

    #[clap(flatten)]
    pub common: CommonArgs,
}

#[derive(Clone, Debug, ValueEnum, Copy, Default)]
pub enum OutputFormat {
    #[default]
    Text,
    Json,
}
#[derive(Clone, Debug, Parser)]
pub struct InstallCommand {
    /// Git config type that determines which file to modify
    #[clap(value_enum)]
    pub config_type: GitConfigType,

    /// Optional path to git config file
    #[arg(long, short)]
    pub git_config_file: Option<PathBuf>,

    /// optional attribute file. If not specified, will write to .git/info/attributes
    #[arg(long, short)]
    pub attribute_file: Option<PathBuf>,
}
#[derive(Clone, Debug, Parser)]
pub struct UninstallCommand {
    /// Git config type that determines which file to modify
    #[clap(value_enum)]
    pub config_type: GitConfigType,

    /// Optional path to git config file
    #[arg(long, short)]
    pub git_config_file: Option<PathBuf>,

    /// optional attribute file. If not specified, will write to .git/info/attributes
    #[arg(long, short)]
    pub attribute_file: Option<PathBuf>,
}

#[derive(Clone, Debug, Parser)]
pub struct RecordCommand {
    pub path: Option<PathBuf>,

    #[arg(long)]
    pub remove: Vec<PathBuf>,

    #[arg(long)]
    pub clear: bool,
    #[arg(long)]
    pub sync: bool,

    #[clap(flatten)]
    pub common: CommonArgs,
}
#[derive(Clone, Debug, Parser)]
pub struct CheckInstallCommand {
    /// Exit zero regardless of install status
    #[arg(long)]
    pub exit_zero: bool,
    /// Git config type to check
    #[clap(value_enum)]
    pub config_type: Option<GitConfigType>,
}

#[derive(Clone, Debug, Parser)]
pub struct SmudgeCommand {
    pub path: String,
}

#[derive(Clone, Debug, ValueEnum, Copy)]
pub enum GitConfigType {
    /// System-wide git config
    System,
    /// User level git config, typically corresponding to ~/.gitconfig
    Global,
    /// Repository level git config, corresponding to .git/config
    Local,
}

#[derive(Clone, Debug, Default)]
pub struct ConfigOverrides {
    pub strip_kernel_info: Option<bool>,
    pub extra_keys: Option<Vec<ExtraKey>>,
    pub drop_empty_cells: Option<bool>,
    pub drop_output: Option<bool>,
    pub drop_count: Option<bool>,
    pub id_action: Option<IdAction>,
    pub strip_init_cell: Option<bool>,
    pub drop_tagged_cells: Option<Vec<String>>,
    pub keep_keys: Option<Vec<ExtraKey>>,
    pub exclude: Option<Vec<FilePattern>>,
    pub extend_exclude: Option<Vec<FilePattern>>,
}

pub struct Args {
    pub config: Option<PathBuf>,
    pub isolated: bool,
    pub allow_no_notebooks: bool,
}

pub fn resolve_bool_arg(yes: bool, no: bool) -> Option<bool> {
    match (yes, no) {
        (true, false) => Some(true),
        (false, true) => Some(false),
        (false, false) => None,
        (..) => unreachable!("Clap should make this impossible"),
    }
}
fn resolve_id_action(
    id_action: Option<IdAction>,
    keep: bool,
    sequential: bool,
    drop: bool,
) -> Option<IdAction> {
    match (id_action, keep, sequential, drop) {
        (Some(id_action), false, false, false) => Some(id_action),
        (None, true, false, false) => Some(IdAction::Keep),
        (None, false, true, false) => Some(IdAction::Sequential),
        (None, false, false, true) => Some(IdAction::Drop),
        (None, false, false, false) => None,
        (..) => unreachable!("Clap should make this impossible"),
    }
}
impl CommonArgs {
    pub fn partition(self) -> (Args, ConfigOverrides) {
        (
            Args {
                config: self.config,
                allow_no_notebooks: self.allow_no_notebooks,
                isolated: self.isolated,
            },
            ConfigOverrides {
                extra_keys: self.extra_keys,
                drop_empty_cells: resolve_bool_arg(self.drop_empty_cells, self.keep_empty_cells),
                drop_output: resolve_bool_arg(self.drop_output, self.keep_output),
                drop_count: resolve_bool_arg(self.drop_count, self.keep_count),
                id_action: resolve_id_action(
                    self.id_action,
                    self.keep_id,
                    self.sequential_id,
                    self.drop_id,
                ),
                drop_tagged_cells: self.drop_tagged_cells,
                strip_init_cell: resolve_bool_arg(self.strip_init_cell, self.keep_init_cell),
                keep_keys: self.keep_keys,
                extend_exclude: self.extend_exclude,
                exclude: self.exclude,
                strip_kernel_info: resolve_bool_arg(self.strip_kernel_info, self.keep_kernel_info),
            },
        )
    }
}

impl ConfigOverrides {
    pub fn override_config(&self, mut config: Configuration) -> Configuration {
        if let Some(extra_keys) = &self.extra_keys {
            config.extra_keys = Some(extra_keys.clone());
        }
        if let Some(drop_count) = &self.drop_count {
            config.drop_count = Some(*drop_count);
        }
        if let Some(drop_empty_cells) = &self.drop_empty_cells {
            config.drop_empty_cells = Some(*drop_empty_cells);
        }
        if let Some(id_action) = &self.id_action {
            config.id_action = Some(*id_action);
        }
        if let Some(drop_output) = &self.drop_output {
            config.drop_output = Some(*drop_output);
        }
        if let Some(drop_tagged_cells) = &self.drop_tagged_cells {
            config.drop_tagged_cells = Some(drop_tagged_cells.clone());
        }
        if let Some(strip_init_cell) = &self.strip_init_cell {
            config.strip_init_cell = Some(*strip_init_cell);
        }
        if let Some(keep_keys) = &self.keep_keys {
            config.keep_keys = Some(keep_keys.clone());
        }
        if let Some(exclude) = &self.exclude {
            config.exclude = Some(exclude.clone());
        }
        if let Some(extend_exclude) = &self.extend_exclude {
            config.extend_exclude.extend(extend_exclude.clone());
        }
        if let Some(strip_kernel_info) = &self.strip_kernel_info {
            config.strip_kernel_info = Some(*strip_kernel_info);
        }
        config
    }
}