use clap::{Parser, Subcommand, ValueEnum};
use std::collections::HashSet;
use std::io::{self, Read, Write};
use std::path::{Path, PathBuf};
use crate::{CleanOptions, NotebookFormat};
pub mod exit_codes {
pub const SUCCESS: i32 = 0;
pub const PARSE_ERROR: i32 = 1;
pub const SERIALIZE_ERROR: i32 = 2;
pub const IO_ERROR: i32 = 3;
pub const INVALID_ARGS: i32 = 4;
}
#[derive(Parser)]
#[command(name = "nbx")]
#[command(author, version, about, long_about = None)]
struct Cli {
#[command(subcommand)]
command: Commands,
}
#[derive(Subcommand)]
enum Commands {
Convert {
input: String,
#[arg(short, long)]
output: String,
#[arg(long, value_name = "FORMAT")]
from_fmt: Option<Format>,
#[arg(long, value_name = "FORMAT")]
to_fmt: Option<Format>,
#[arg(long)]
strip_outputs: bool,
#[arg(long)]
strip_metadata: bool,
#[arg(long)]
no_header: bool,
},
Clean {
input: PathBuf,
#[arg(short, long)]
output: Option<PathBuf>,
#[arg(short, long, conflicts_with = "output")]
in_place: bool,
#[arg(short = 'O', long)]
remove_outputs: bool,
#[arg(short = 'e', long)]
remove_execution_counts: bool,
#[arg(long)]
remove_cell_metadata: bool,
#[arg(long)]
remove_notebook_metadata: bool,
#[arg(long)]
remove_kernel_info: bool,
#[arg(long)]
preserve_cell_ids: bool,
#[arg(long)]
remove_output_metadata: bool,
#[arg(long)]
remove_output_execution_counts: bool,
#[arg(long)]
normalize_cell_ids: bool,
#[arg(long)]
sort_keys: bool,
#[arg(long, value_delimiter = ',')]
keep_only: Option<Vec<String>>,
},
}
#[derive(Copy, Clone, PartialEq, Eq, ValueEnum)]
enum Format {
Ipynb,
Percent,
}
impl From<Format> for NotebookFormat {
fn from(f: Format) -> Self {
match f {
Format::Ipynb => NotebookFormat::Ipynb,
Format::Percent => NotebookFormat::Percent,
}
}
}
pub fn run<I, T>(args: I) -> i32
where
I: IntoIterator<Item = T>,
T: Into<std::ffi::OsString> + Clone,
{
let cli = match Cli::try_parse_from(args) {
Ok(cli) => cli,
Err(e) => {
let _ = e.print();
return if e.kind() == clap::error::ErrorKind::DisplayHelp
|| e.kind() == clap::error::ErrorKind::DisplayVersion
{
exit_codes::SUCCESS
} else {
exit_codes::INVALID_ARGS
};
}
};
let result = match cli.command {
Commands::Convert {
input,
output,
from_fmt,
to_fmt,
strip_outputs,
strip_metadata,
no_header,
} => run_convert(
&input,
&output,
from_fmt,
to_fmt,
strip_outputs,
strip_metadata,
no_header,
),
Commands::Clean {
input,
output,
in_place,
remove_outputs,
remove_execution_counts,
remove_cell_metadata,
remove_notebook_metadata,
remove_kernel_info,
preserve_cell_ids,
remove_output_metadata,
remove_output_execution_counts,
normalize_cell_ids,
sort_keys,
keep_only,
} => run_clean(
&input,
output.as_deref(),
in_place,
remove_outputs,
remove_execution_counts,
remove_cell_metadata,
remove_notebook_metadata,
remove_kernel_info,
preserve_cell_ids,
remove_output_metadata,
remove_output_execution_counts,
normalize_cell_ids,
sort_keys,
keep_only,
),
};
match result {
Ok(()) => exit_codes::SUCCESS,
Err(e) => {
eprintln!("Error: {}", e);
e.exit_code()
}
}
}
#[derive(Debug, thiserror::Error)]
enum CliError {
#[error("Failed to parse '{path}': {message}")]
Parse { path: String, message: String },
#[error("Failed to serialize notebook: {0}")]
Serialize(String),
#[error("I/O error: {0}")]
Io(#[from] io::Error),
#[error("{0}")]
InvalidArgs(String),
}
impl CliError {
fn exit_code(&self) -> i32 {
match self {
CliError::Parse { .. } => exit_codes::PARSE_ERROR,
CliError::Serialize(_) => exit_codes::SERIALIZE_ERROR,
CliError::Io(_) => exit_codes::IO_ERROR,
CliError::InvalidArgs(_) => exit_codes::INVALID_ARGS,
}
}
}
fn infer_format(path: &str, explicit_format: Option<Format>) -> Result<NotebookFormat, CliError> {
if let Some(fmt) = explicit_format {
return Ok(fmt.into());
}
if path == "-" {
return Err(CliError::InvalidArgs(
"Cannot infer format for stdin/stdout. Use --from-fmt or --to-fmt.".to_string(),
));
}
let path_buf = PathBuf::from(path);
NotebookFormat::from_path(&path_buf).ok_or_else(|| {
CliError::InvalidArgs(format!(
"Cannot infer format from '{}'. Use --from-fmt or --to-fmt.",
path
))
})
}
fn read_content(path: &str) -> Result<String, CliError> {
if path == "-" {
let mut content = String::new();
io::stdin().read_to_string(&mut content)?;
Ok(content)
} else {
std::fs::read_to_string(path).map_err(|e| {
if e.kind() == io::ErrorKind::NotFound {
CliError::Io(io::Error::new(
io::ErrorKind::NotFound,
format!("File not found: {}", path),
))
} else {
CliError::Io(e)
}
})
}
}
fn write_content(path: &str, content: &str) -> Result<(), CliError> {
if path == "-" {
io::stdout().write_all(content.as_bytes())?;
Ok(())
} else {
std::fs::write(path, content)?;
Ok(())
}
}
fn run_convert(
input: &str,
output: &str,
from_fmt: Option<Format>,
to_fmt: Option<Format>,
strip_outputs: bool,
strip_metadata: bool,
no_header: bool,
) -> Result<(), CliError> {
let input_format = infer_format(input, from_fmt)?;
let output_format = infer_format(output, to_fmt)?;
let content = read_content(input)?;
let notebook = input_format.parse(&content).map_err(|e| CliError::Parse {
path: input.to_string(),
message: e.to_string(),
})?;
let notebook = if strip_outputs || strip_metadata {
let options = CleanOptions {
remove_outputs: strip_outputs,
remove_execution_counts: strip_outputs,
remove_cell_metadata: strip_metadata,
remove_notebook_metadata: strip_metadata,
..Default::default()
};
notebook.clean(&options)
} else {
notebook
};
let output_content = output_format
.serialize_with_header(¬ebook, !no_header)
.map_err(|e| CliError::Serialize(e.to_string()))?;
write_content(output, &output_content)?;
Ok(())
}
#[allow(clippy::too_many_arguments)]
fn run_clean(
input: &Path,
output: Option<&Path>,
in_place: bool,
remove_outputs: bool,
remove_execution_counts: bool,
remove_cell_metadata: bool,
remove_notebook_metadata: bool,
remove_kernel_info: bool,
preserve_cell_ids: bool,
remove_output_metadata: bool,
remove_output_execution_counts: bool,
normalize_cell_ids: bool,
sort_keys: bool,
keep_only: Option<Vec<String>>,
) -> Result<(), CliError> {
let format = NotebookFormat::from_path(input).ok_or_else(|| {
CliError::InvalidArgs(format!(
"Cannot infer format from '{}'. Supported extensions: .ipynb, .pct.py",
input.display()
))
})?;
let content = std::fs::read_to_string(input).map_err(|e| {
if e.kind() == io::ErrorKind::NotFound {
CliError::Io(io::Error::new(
io::ErrorKind::NotFound,
format!("File not found: {}", input.display()),
))
} else {
CliError::Io(e)
}
})?;
let notebook = format.parse(&content).map_err(|e| CliError::Parse {
path: input.display().to_string(),
message: e.to_string(),
})?;
let allowed_keys = keep_only.map(|keys| keys.into_iter().collect::<HashSet<_>>());
let options = CleanOptions {
remove_outputs,
remove_execution_counts,
remove_cell_metadata,
remove_notebook_metadata,
remove_kernel_info,
preserve_cell_ids,
remove_output_metadata,
remove_output_execution_counts,
normalize_cell_ids,
sort_keys,
allowed_cell_metadata_keys: allowed_keys.clone(),
allowed_notebook_metadata_keys: allowed_keys,
};
let cleaned = notebook.clean(&options);
let output_content = format
.serialize(&cleaned)
.map_err(|e| CliError::Serialize(e.to_string()))?;
if in_place {
std::fs::write(input, &output_content)?;
} else if let Some(output_path) = output {
std::fs::write(output_path, &output_content)?;
} else {
io::stdout().write_all(output_content.as_bytes())?;
}
Ok(())
}