use std::fs::File;
use std::io::{BufWriter, Write};
use std::path::{Path, PathBuf};
use anyhow::Context;
use clap::Args;
use biodream::{CsvOptions, ReadOptions};
#[derive(Debug, Clone, Copy, PartialEq, Eq, clap::ValueEnum)]
pub enum OutputFormat {
Csv,
Arrow,
Parquet,
}
#[derive(Debug, Args)]
pub struct ConvertArgs {
#[arg(value_name = "FILE")]
pub path: PathBuf,
#[arg(short, long, value_name = "OUTPUT")]
pub output: Option<PathBuf>,
#[arg(short = 'F', long, value_name = "FORMAT")]
pub format: Option<OutputFormat>,
#[arg(long, value_delimiter = ',', value_name = "INDEX")]
pub channels: Option<Vec<usize>>,
#[arg(long)]
pub scaled: bool,
}
pub fn run(args: ConvertArgs) -> anyhow::Result<()> {
let format = resolve_format(args.format, &args.path, args.output.as_deref())?;
let output_path = resolve_output_path(args.output, &args.path, format)?;
let result = read_with_options(&args.path, args.channels.as_deref(), args.scaled)?;
let df = result.value;
let file = File::create(&output_path)
.with_context(|| format!("failed to create {}", output_path.display()))?;
write_output(format, &df, file)
}
fn read_with_options(
path: &Path,
channel_indices: Option<&[usize]>,
scaled: bool,
) -> anyhow::Result<biodream::ParseResult<biodream::Datafile>> {
let mut opts = ReadOptions::new().scaled(scaled);
if let Some(indices) = channel_indices {
opts = opts.channels(indices);
}
if path == Path::new("-") {
use std::io::Read;
let mut bytes = Vec::new();
std::io::stdin()
.read_to_end(&mut bytes)
.context("failed to read from stdin")?;
opts.read_bytes(&bytes)
.context("failed to parse .acq from stdin")
} else {
opts.read_file(path)
.with_context(|| format!("failed to read {}", path.display()))
}
}
pub fn resolve_format(
explicit: Option<OutputFormat>,
input: &Path,
output: Option<&Path>,
) -> anyhow::Result<OutputFormat> {
if let Some(f) = explicit {
return Ok(f);
}
let ext_path = output.unwrap_or(input);
let ext = ext_path
.extension()
.and_then(|e| e.to_str())
.unwrap_or("")
.to_ascii_lowercase();
match ext.as_str() {
"csv" => Ok(OutputFormat::Csv),
"arrow" | "ipc" => Ok(OutputFormat::Arrow),
"parquet" | "pq" => Ok(OutputFormat::Parquet),
_ if output.is_none() => Ok(OutputFormat::Csv),
other => anyhow::bail!(
"cannot infer output format from extension '.{other}'; \
use --format csv|arrow|parquet"
),
}
}
fn resolve_output_path(
explicit: Option<PathBuf>,
input: &Path,
format: OutputFormat,
) -> anyhow::Result<PathBuf> {
if let Some(p) = explicit {
return Ok(p);
}
if input == Path::new("-") {
anyhow::bail!("--output is required when reading from stdin");
}
let ext = match format {
OutputFormat::Csv => "csv",
OutputFormat::Arrow => "arrow",
OutputFormat::Parquet => "parquet",
};
let stem = input
.file_prefix()
.unwrap_or_else(|| std::ffi::OsStr::new("output"));
Ok(PathBuf::from(stem).with_extension(ext))
}
fn write_output(format: OutputFormat, df: &biodream::Datafile, file: File) -> anyhow::Result<()> {
match format {
OutputFormat::Csv => {
let mut w = BufWriter::new(file);
biodream::to_csv(df, &mut w, &CsvOptions::default()).context("CSV export failed")?;
w.flush().context("flush failed")
}
OutputFormat::Arrow => write_arrow(df, BufWriter::new(file)),
OutputFormat::Parquet => write_parquet(df, BufWriter::new(file)),
}
}
#[cfg(any(feature = "arrow", feature = "parquet"))]
fn write_arrow<W: Write>(df: &biodream::Datafile, mut writer: W) -> anyhow::Result<()> {
biodream::to_arrow_ipc(df, &mut writer).context("Arrow IPC export failed")?;
writer.flush().context("flush failed")
}
#[cfg(not(any(feature = "arrow", feature = "parquet")))]
fn write_arrow<W: Write>(_df: &biodream::Datafile, _writer: W) -> anyhow::Result<()> {
anyhow::bail!(
"Arrow IPC export requires the 'arrow' feature; \
recompile with: cargo build --features arrow"
)
}
#[cfg(feature = "parquet")]
fn write_parquet<W: Write + Send>(df: &biodream::Datafile, writer: W) -> anyhow::Result<()> {
biodream::to_parquet(df, writer, &biodream::ParquetOptions::default())
.context("Parquet export failed")
}
#[cfg(not(feature = "parquet"))]
fn write_parquet<W: Write + Send>(_df: &biodream::Datafile, _writer: W) -> anyhow::Result<()> {
anyhow::bail!(
"Parquet export requires the 'parquet' feature; \
recompile with: cargo build --features parquet"
)
}
#[cfg(test)]
mod tests {
use super::*;
use std::path::Path;
#[test]
fn resolve_format_explicit_csv() -> anyhow::Result<()> {
let f = resolve_format(Some(OutputFormat::Csv), Path::new("a.acq"), None)?;
assert_eq!(f, OutputFormat::Csv);
Ok(())
}
#[test]
fn resolve_format_from_output_extension() -> anyhow::Result<()> {
let f = resolve_format(None, Path::new("a.acq"), Some(Path::new("out.parquet")))?;
assert_eq!(f, OutputFormat::Parquet);
Ok(())
}
#[test]
fn resolve_format_arrow_from_ipc_extension() -> anyhow::Result<()> {
let f = resolve_format(None, Path::new("a.acq"), Some(Path::new("out.ipc")))?;
assert_eq!(f, OutputFormat::Arrow);
Ok(())
}
#[test]
fn resolve_format_defaults_to_csv_when_no_output() -> anyhow::Result<()> {
let f = resolve_format(None, Path::new("data.acq"), None)?;
assert_eq!(f, OutputFormat::Csv);
Ok(())
}
#[test]
fn resolve_format_unknown_extension_returns_error() {
let r = resolve_format(None, Path::new("a.acq"), Some(Path::new("out.xyz")));
assert!(r.is_err());
}
#[test]
fn resolve_output_path_derives_csv_from_input() -> anyhow::Result<()> {
let p = resolve_output_path(None, Path::new("data.acq"), OutputFormat::Csv)?;
assert_eq!(p, PathBuf::from("data.csv"));
Ok(())
}
#[test]
fn resolve_output_path_derives_arrow_from_input() -> anyhow::Result<()> {
let p = resolve_output_path(None, Path::new("my.data.acq"), OutputFormat::Arrow)?;
assert_eq!(p, PathBuf::from("my.arrow"));
Ok(())
}
#[test]
fn resolve_output_path_stdin_without_explicit_errors() {
let r = resolve_output_path(None, Path::new("-"), OutputFormat::Csv);
assert!(r.is_err());
}
}