use std::{
fs::File,
io::{stdout, Write},
path::{Path, PathBuf},
};
use anyhow::{anyhow, bail, Error as AnyError, Result};
use chrono::{Datelike, NaiveTime, Timelike};
use clap::{Args, ValueEnum};
use csv::Writer;
use encoding_rs::Encoding;
use pspp::{
calendar::calendar_offset_to_gregorian,
data::{ByteString, Case, Datum, WithEncoding},
file::FileType,
format::{DisplayPlain, Type},
por::PortableFile,
sys::{raw::records::Compression, ReadOptions, WriteOptions},
util::ToSmallString,
variable::Variable,
};
use crate::parse_encoding;
#[derive(Args, Clone, Debug)]
pub struct Convert {
input: PathBuf,
output: Option<PathBuf>,
#[arg(short = 'O')]
output_format: Option<OutputFormat>,
#[arg(short = 'e', long, value_parser = parse_encoding)]
encoding: Option<&'static Encoding>,
#[clap(short, long)]
password: Option<String>,
#[arg(short = 'c', long = "cases")]
max_cases: Option<usize>,
#[command(flatten, next_help_heading = "Options for CSV output")]
csv_options: CsvOptions,
#[command(flatten, next_help_heading = "Options for system file output")]
sys_options: SysOptions,
}
#[derive(Args, Clone, Debug)]
struct CsvOptions {
#[arg(long)]
no_var_names: bool,
#[arg(long)]
recode: bool,
#[arg(long)]
labels: bool,
#[arg(long)]
print_formats: bool,
#[arg(long, default_value_t = '.')]
decimal: char,
#[arg(long)]
delimiter: Option<char>,
#[arg(long, default_value_t = '"')]
qualifier: char,
}
impl CsvOptions {
fn write_field<W>(
&self,
datum: &Datum<WithEncoding<ByteString>>,
variable: &Variable,
writer: &mut Writer<W>,
) -> csv::Result<()>
where
W: Write,
{
if self.labels
&& let Some(label) = variable.value_labels.get(datum)
{
writer.write_field(label)
} else if datum.is_sysmis() {
writer.write_field(" ")
} else if self.print_formats || datum.is_string() {
writer.write_field(
datum
.display(variable.print_format)
.with_trimming()
.to_small_string::<64>(),
)
} else {
let number = datum.as_number().unwrap().unwrap();
match variable.print_format.type_() {
Type::F
| Type::Comma
| Type::Dot
| Type::Dollar
| Type::Pct
| Type::E
| Type::CC(_)
| Type::N
| Type::Z
| Type::P
| Type::PK
| Type::IB
| Type::PIB
| Type::PIBHex
| Type::RB
| Type::RBHex
| Type::WkDay
| Type::Month => writer.write_field(
number
.display_plain()
.with_decimal(self.decimal)
.to_small_string::<64>(),
),
Type::Date
| Type::ADate
| Type::EDate
| Type::JDate
| Type::SDate
| Type::QYr
| Type::MoYr
| Type::WkYr => {
if number >= 0.0
&& let Some(date) =
calendar_offset_to_gregorian(number / 60.0 / 60.0 / 24.0)
{
writer.write_field(
format_args!(
"{:02}/{:02}/{:04}",
date.month(),
date.day(),
date.year()
)
.to_small_string::<64>(),
)
} else {
writer.write_field(" ")
}
}
Type::DateTime | Type::YmdHms => {
if number >= 0.0
&& let Some(date) =
calendar_offset_to_gregorian(number / 60.0 / 60.0 / 24.0)
&& let Some(time) = NaiveTime::from_num_seconds_from_midnight_opt(
(number % (60.0 * 60.0 * 24.0)) as u32,
0,
)
{
writer.write_field(
format_args!(
"{:02}/{:02}/{:04} {:02}:{:02}:{:02}",
date.month(),
date.day(),
date.year(),
time.hour(),
time.minute(),
time.second()
)
.to_small_string::<64>(),
)
} else {
writer.write_field(" ")
}
}
Type::MTime | Type::Time | Type::DTime => {
if let Some(time) =
NaiveTime::from_num_seconds_from_midnight_opt(number.abs() as u32, 0)
{
writer.write_field(
format_args!(
"{}{:02}:{:02}:{:02}",
if number.is_sign_negative() { "-" } else { "" },
time.hour(),
time.minute(),
time.second()
)
.to_small_string::<64>(),
)
} else {
writer.write_field(" ")
}
}
Type::A | Type::AHex => unreachable!(),
}
}
}
}
#[derive(Args, Clone, Debug)]
struct SysOptions {
/// Write the output file with Unicode (UTF-8) encoding.
///
/// If the input was not already encoded in Unicode, this triples the width
/// of string variables.
#[arg(long = "unicode")]
to_unicode: bool,
/// How to compress data in the system file.
#[arg(long, default_value = "simple")]
compression: Option<Compression>,
}
/// Output file format.
#[derive(Copy, Clone, Debug, PartialEq, Eq, ValueEnum)]
enum OutputFormat {
/// Comma-separated values using each variable's print format (variable
/// names are written as the first line)
Csv,
/// System file
Sys,
/// Portable file
Por,
}
impl TryFrom<&Path> for OutputFormat {
type Error = AnyError;
fn try_from(value: &Path) -> std::result::Result<Self, Self::Error> {
let extension = value.extension().unwrap_or_default();
if extension.eq_ignore_ascii_case("csv") || extension.eq_ignore_ascii_case("txt") {
Ok(OutputFormat::Csv)
} else if extension.eq_ignore_ascii_case("sav") || extension.eq_ignore_ascii_case("sys") {
Ok(OutputFormat::Sys)
} else if extension.eq_ignore_ascii_case("por") {
Ok(OutputFormat::Por)
} else {
Err(anyhow!(
"Unknown output file extension '{}'",
extension.display()
))
}
}
}
impl Convert {
pub fn run(self) -> Result<()> {
let output_format = match self.output_format {
Some(format) => format,
None => match &self.output {
Some(output) => output.as_path().try_into()?,
_ => OutputFormat::Csv,
},
};
let (dictionary, cases) = match FileType::from_file(&self.input)? {
Some(FileType::System { .. }) => {
fn warn(warning: anyhow::Error) {
eprintln!("warning: {warning}");
}
let mut system_file = ReadOptions::new(warn)
.with_encoding(self.encoding)
.with_password(self.password.clone())
.open_file(&self.input)?;
if output_format == OutputFormat::Sys && self.sys_options.to_unicode {
system_file = system_file.into_unicode();
}
let (dictionary, _, cases) = system_file.into_parts();
let cases = cases.map(|result| result.map_err(AnyError::from));
let cases = Box::new(cases)
as Box<dyn Iterator<Item = Result<Case<Vec<Datum<ByteString>>>, AnyError>>>;
(dictionary, cases)
}
Some(FileType::Portable) => {
fn warn_portable(warning: pspp::por::Warning) {
eprintln!("warning: {warning}");
}
let portable_file = PortableFile::open_file(&self.input, warn_portable)?;
let (dictionary, _, cases) = portable_file.into_parts();
let cases = cases.map(|result| result.map_err(AnyError::from));
let cases = Box::new(cases)
as Box<dyn Iterator<Item = Result<Case<Vec<Datum<ByteString>>>, AnyError>>>;
(dictionary, cases)
}
_ => bail!("{}: not a system or portable file", self.input.display()),
};
// Take only the first `self.max_cases` cases.
let cases = cases.take(self.max_cases.unwrap_or(usize::MAX));
match output_format {
OutputFormat::Csv => {
let writer = match self.output {
Some(path) => Box::new(File::create(path)?) as Box<dyn Write>,
None => Box::new(stdout()),
};
let decimal: u8 = self.csv_options.decimal.try_into()?;
let delimiter: u8 = match self.csv_options.delimiter {
Some(delimiter) => delimiter.try_into()?,
None if decimal != b',' => b',',
None => b';',
};
let qualifier: u8 = self.csv_options.qualifier.try_into()?;
let mut output = csv::WriterBuilder::new()
.delimiter(delimiter)
.quote(qualifier)
.from_writer(writer);
if !self.csv_options.no_var_names {
output
.write_record(dictionary.variables.iter().map(|var| var.name.as_str()))?;
}
for case in cases {
for (datum, variable) in case?.into_iter().zip(dictionary.variables.iter()) {
self.csv_options
.write_field(&datum, variable, &mut output)?;
}
output.write_record(None::<&[u8]>)?;
}
}
OutputFormat::Sys => {
let Some(output) = &self.output else {
bail!("output file name must be specified for output to a system file")
};
let mut output = WriteOptions::new()
.with_compression(self.sys_options.compression)
.write_file(&dictionary, output)?;
for case in cases {
output.write_case(case?)?;
}
}
OutputFormat::Por => {
let Some(output) = &self.output else {
bail!("output file name must be specified for output to a portable file")
};
let mut output = pspp::por::WriteOptions::new().write_file(&dictionary, output)?;
for case in cases {
output.write_case(case?)?;
}
}
}
Ok(())
}
}