#![deny(
clippy::unwrap_used,
clippy::expect_used,
clippy::panic,
clippy::indexing_slicing
)]
use crate::error::Result;
pub mod csv_in;
pub mod csv_out;
pub mod infer;
pub mod inspect;
pub mod json_in;
pub mod json_out;
pub mod xml_in;
#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
pub enum VerifyPolicy {
#[default]
Auto,
Force,
Off,
}
#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
pub enum BinaryEncoding {
#[default]
Base64,
Hex,
Skip,
}
#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
pub enum XmlAttrsMode {
#[default]
AsFields,
Prefix,
}
#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
pub enum ConflictPolicy {
#[default]
Error,
CoerceString,
FirstWins,
}
#[derive(Debug, Default)]
pub struct InferredSchema {
pub keys: Vec<InferredKey>,
pub key_states: Vec<crate::convert::infer::KeyState>,
pub total_records: usize,
}
#[derive(Debug)]
pub struct InferredKey {
pub name: String,
pub sigil: u8,
pub optional: bool,
pub list_of: Option<u8>,
}
#[derive(Debug, Default, Clone)]
pub struct CommonOpts {
pub input_path: Option<std::path::PathBuf>,
pub output_path: Option<std::path::PathBuf>,
}
#[derive(Debug)]
pub struct ImportArgs {
pub common: CommonOpts,
pub from: ImportFormat,
pub schema_hint: Option<std::path::PathBuf>,
pub conflict: ConflictPolicy,
pub root: Option<String>,
pub csv_delimiter: Option<char>,
pub csv_no_header: bool,
pub xml_record_tag: Option<String>,
pub xml_attrs: XmlAttrsMode,
pub buffer_records: usize,
pub max_depth: usize,
pub xml_max_depth: usize,
pub tail_index_spill: bool,
pub verify: VerifyPolicy,
}
impl Default for ImportArgs {
fn default() -> Self {
Self {
common: CommonOpts::default(),
from: ImportFormat::default(),
schema_hint: None,
conflict: ConflictPolicy::default(),
root: None,
csv_delimiter: None,
csv_no_header: false,
xml_record_tag: None,
xml_attrs: XmlAttrsMode::default(),
buffer_records: 4096,
max_depth: 64,
xml_max_depth: 64,
tail_index_spill: false,
verify: VerifyPolicy::default(),
}
}
}
#[derive(Debug, Default, Clone, Copy, PartialEq, Eq)]
pub enum ImportFormat {
#[default]
Json,
Csv,
Xml,
}
#[derive(Debug, Default)]
pub struct ImportReport {
pub records_written: usize,
pub output_bytes: usize,
}
#[derive(Debug, Default)]
pub struct ExportArgs {
pub common: CommonOpts,
pub to: ExportFormat,
pub pretty: bool,
pub ndjson: bool,
pub columns: Option<Vec<String>>,
pub csv_delimiter: Option<char>,
pub binary: BinaryEncoding,
pub csv_safe: bool,
}
#[derive(Debug, Default, Clone, Copy, PartialEq, Eq)]
pub enum ExportFormat {
#[default]
Json,
Csv,
}
#[derive(Debug, Default)]
pub struct ExportReport {
pub records_read: usize,
pub output_bytes: usize,
}
#[derive(Debug, Default)]
pub struct InspectArgs {
pub common: CommonOpts,
pub json_output: bool,
pub records_to_show: Option<usize>,
pub record_index: Option<usize>,
pub verify_hash: bool,
}
#[derive(Debug, Default)]
pub struct InspectReport {
pub dict_hash_ok: Option<bool>,
pub record_count: usize,
}
#[derive(Debug, serde::Deserialize)]
struct SchemaHintFile {
keys: std::collections::HashMap<String, SchemaHintKey>,
}
#[derive(Debug, serde::Deserialize)]
struct SchemaHintKey {
sigil: String,
#[serde(default)]
optional: bool,
list_of: Option<String>,
}
pub fn load_schema_hint(path: &std::path::Path) -> Result<InferredSchema> {
let text = std::fs::read_to_string(path)
.map_err(|e| crate::error::NxsError::IoError(format!("{}: {e}", path.display())))?;
let hint: SchemaHintFile = serde_yaml2::de::from_str(&text).map_err(|e| {
crate::error::NxsError::ConvertParseError {
offset: 0,
msg: format!("schema hint YAML parse error: {e}"),
}
})?;
let keys = hint
.keys
.into_iter()
.map(|(name, k)| {
let sigil = k.sigil.bytes().next().unwrap_or(b'"');
let list_of = k.list_of.as_deref().and_then(|s| s.bytes().next());
InferredKey {
name,
sigil,
optional: k.optional,
list_of,
}
})
.collect();
Ok(InferredSchema {
keys,
key_states: vec![],
total_records: 0,
})
}
pub fn exit_code_for(err: &crate::error::NxsError) -> i32 {
use crate::error::NxsError;
match err {
NxsError::ConvertSchemaConflict(_) => 4,
NxsError::ConvertParseError { .. }
| NxsError::ConvertEntityExpansion
| NxsError::ConvertDepthExceeded
| NxsError::BadMagic
| NxsError::OutOfBounds
| NxsError::RecursionLimit => 3,
NxsError::IoError(_) => 5,
_ => 1,
}
}
pub fn run_import(args: &ImportArgs) -> Result<ImportReport> {
use crate::convert::json_in;
use std::io::BufReader;
let input_path = args.common.input_path.as_deref();
let output_path = args.common.output_path.as_deref();
match args.from {
ImportFormat::Json => {
match input_path {
Some(path) => {
let schema = if let Some(hint_path) = &args.schema_hint {
load_schema_hint(hint_path)?
} else {
let f1 = std::fs::File::open(path).map_err(|e| {
crate::error::NxsError::IoError(format!("{}: {e}", path.display()))
})?;
json_in::infer_schema(BufReader::new(f1), args)?
};
let f2 = std::fs::File::open(path).map_err(|e| {
crate::error::NxsError::IoError(format!("{}: {e}", path.display()))
})?;
match output_path {
Some(out_path) => {
let out = std::fs::File::create(out_path).map_err(|e| {
crate::error::NxsError::IoError(format!(
"{}: {e}",
out_path.display()
))
})?;
json_in::emit(BufReader::new(f2), out, &schema, args)
}
None => json_in::emit(BufReader::new(f2), std::io::stdout(), &schema, args),
}
}
None => {
let mut spill = tempfile::NamedTempFile::new()
.map_err(|e| crate::error::NxsError::IoError(e.to_string()))?;
std::io::copy(&mut std::io::stdin(), &mut spill)
.map_err(|e| crate::error::NxsError::IoError(e.to_string()))?;
let spill_path = spill.path().to_path_buf();
let schema = if let Some(hint_path) = &args.schema_hint {
load_schema_hint(hint_path)?
} else {
let f1 = std::fs::File::open(&spill_path)
.map_err(|e| crate::error::NxsError::IoError(e.to_string()))?;
json_in::infer_schema(BufReader::new(f1), args)?
};
let f2 = std::fs::File::open(&spill_path)
.map_err(|e| crate::error::NxsError::IoError(e.to_string()))?;
match output_path {
Some(out_path) => {
let out = std::fs::File::create(out_path).map_err(|e| {
crate::error::NxsError::IoError(format!(
"{}: {e}",
out_path.display()
))
})?;
json_in::emit(BufReader::new(f2), out, &schema, args)
}
None => json_in::emit(BufReader::new(f2), std::io::stdout(), &schema, args),
}
}
}
}
ImportFormat::Csv => {
use crate::convert::csv_in;
match input_path {
Some(path) => {
let schema = if let Some(hint_path) = &args.schema_hint {
load_schema_hint(hint_path)?
} else {
let f1 = std::fs::File::open(path).map_err(|e| {
crate::error::NxsError::IoError(format!("{}: {e}", path.display()))
})?;
csv_in::infer_schema(BufReader::new(f1), args)?
};
let f2 = std::fs::File::open(path).map_err(|e| {
crate::error::NxsError::IoError(format!("{}: {e}", path.display()))
})?;
match output_path {
Some(out_path) => {
let out = std::fs::File::create(out_path).map_err(|e| {
crate::error::NxsError::IoError(format!(
"{}: {e}",
out_path.display()
))
})?;
csv_in::emit(BufReader::new(f2), out, &schema, args)
}
None => csv_in::emit(BufReader::new(f2), std::io::stdout(), &schema, args),
}
}
None => {
let mut spill = tempfile::NamedTempFile::new()
.map_err(|e| crate::error::NxsError::IoError(e.to_string()))?;
std::io::copy(&mut std::io::stdin(), &mut spill)
.map_err(|e| crate::error::NxsError::IoError(e.to_string()))?;
let spill_path = spill.path().to_path_buf();
let schema = if let Some(hint_path) = &args.schema_hint {
load_schema_hint(hint_path)?
} else {
let f1 = std::fs::File::open(&spill_path)
.map_err(|e| crate::error::NxsError::IoError(e.to_string()))?;
csv_in::infer_schema(BufReader::new(f1), args)?
};
let f2 = std::fs::File::open(&spill_path)
.map_err(|e| crate::error::NxsError::IoError(e.to_string()))?;
match output_path {
Some(out_path) => {
let out = std::fs::File::create(out_path).map_err(|e| {
crate::error::NxsError::IoError(format!(
"{}: {e}",
out_path.display()
))
})?;
csv_in::emit(BufReader::new(f2), out, &schema, args)
}
None => csv_in::emit(BufReader::new(f2), std::io::stdout(), &schema, args),
}
}
}
}
ImportFormat::Xml => {
use crate::convert::xml_in;
match input_path {
Some(path) => {
let schema = if let Some(hint_path) = &args.schema_hint {
load_schema_hint(hint_path)?
} else {
let f1 = std::fs::File::open(path).map_err(|e| {
crate::error::NxsError::IoError(format!("{}: {e}", path.display()))
})?;
xml_in::infer_schema(BufReader::new(f1), args)?
};
let f2 = std::fs::File::open(path).map_err(|e| {
crate::error::NxsError::IoError(format!("{}: {e}", path.display()))
})?;
match output_path {
Some(out_path) => {
let out = std::fs::File::create(out_path).map_err(|e| {
crate::error::NxsError::IoError(format!(
"{}: {e}",
out_path.display()
))
})?;
xml_in::emit(BufReader::new(f2), out, &schema, args)
}
None => xml_in::emit(BufReader::new(f2), std::io::stdout(), &schema, args),
}
}
None => {
let mut spill = tempfile::NamedTempFile::new()
.map_err(|e| crate::error::NxsError::IoError(e.to_string()))?;
std::io::copy(&mut std::io::stdin(), &mut spill)
.map_err(|e| crate::error::NxsError::IoError(e.to_string()))?;
let spill_path = spill.path().to_path_buf();
let schema = if let Some(hint_path) = &args.schema_hint {
load_schema_hint(hint_path)?
} else {
let f1 = std::fs::File::open(&spill_path)
.map_err(|e| crate::error::NxsError::IoError(e.to_string()))?;
xml_in::infer_schema(BufReader::new(f1), args)?
};
let f2 = std::fs::File::open(&spill_path)
.map_err(|e| crate::error::NxsError::IoError(e.to_string()))?;
match output_path {
Some(out_path) => {
let out = std::fs::File::create(out_path).map_err(|e| {
crate::error::NxsError::IoError(format!(
"{}: {e}",
out_path.display()
))
})?;
xml_in::emit(BufReader::new(f2), out, &schema, args)
}
None => xml_in::emit(BufReader::new(f2), std::io::stdout(), &schema, args),
}
}
}
}
}
}
pub fn run_export(args: &ExportArgs) -> Result<ExportReport> {
use crate::convert::json_out;
let input_path = args.common.input_path.as_deref();
let output_path = args.common.output_path.as_deref();
macro_rules! open_input {
($path:expr) => {
std::fs::File::open($path)
.map_err(|e| crate::error::NxsError::IoError(format!("{}: {e}", $path.display())))
};
}
macro_rules! open_output {
($path:expr) => {
std::fs::File::create($path)
.map_err(|e| crate::error::NxsError::IoError(format!("{}: {e}", $path.display())))
};
}
match args.to {
ExportFormat::Json => match (input_path, output_path) {
(Some(inp), Some(out)) => json_out::run(open_input!(inp)?, open_output!(out)?, args),
(Some(inp), None) => json_out::run(open_input!(inp)?, std::io::stdout(), args),
(None, Some(out)) => json_out::run(std::io::stdin(), open_output!(out)?, args),
(None, None) => json_out::run(std::io::stdin(), std::io::stdout(), args),
},
ExportFormat::Csv => {
use crate::convert::csv_out;
match (input_path, output_path) {
(Some(inp), Some(out)) => csv_out::run(open_input!(inp)?, open_output!(out)?, args),
(Some(inp), None) => csv_out::run(open_input!(inp)?, std::io::stdout(), args),
(None, Some(out)) => csv_out::run(std::io::stdin(), open_output!(out)?, args),
(None, None) => csv_out::run(std::io::stdin(), std::io::stdout(), args),
}
}
}
}
pub fn run_inspect(args: &InspectArgs) -> Result<InspectReport> {
use crate::convert::inspect;
if args.json_output {
inspect::render_json(std::io::stdout(), args)
} else {
inspect::render_text(std::io::stdout(), args)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn import_args_maps_every_spec_flag() {
let spec_fields: &[&str] = &[
"from",
"schema_hint",
"conflict",
"root",
"csv_delimiter",
"csv_no_header",
"xml_record_tag",
"xml_attrs",
"buffer_records",
"max_depth",
"xml_max_depth",
"tail_index_spill",
"verify",
];
let a = ImportArgs::default();
let _ = &a.from;
let _ = &a.schema_hint;
let _ = &a.conflict;
let _ = &a.root;
let _ = &a.csv_delimiter;
let _ = &a.csv_no_header;
let _ = &a.xml_record_tag;
let _ = &a.xml_attrs;
let _ = &a.buffer_records;
let _ = &a.max_depth;
let _ = &a.xml_max_depth;
let _ = &a.tail_index_spill;
let _ = &a.verify;
assert_eq!(spec_fields.len(), 13, "spec has 13 import flags");
}
#[test]
fn export_args_maps_every_spec_flag() {
let spec_fields: &[&str] = &[
"to",
"pretty",
"ndjson",
"columns",
"csv_delimiter",
"binary",
"csv_safe",
];
let a = ExportArgs::default();
let _ = &a.to;
let _ = &a.pretty;
let _ = &a.ndjson;
let _ = &a.columns;
let _ = &a.csv_delimiter;
let _ = &a.binary;
let _ = &a.csv_safe;
assert_eq!(spec_fields.len(), 7, "spec has 7 export flags");
}
#[test]
fn inspect_args_maps_every_spec_flag() {
let spec_fields: &[&str] = &["json_output", "records_to_show", "verify_hash"];
let a = InspectArgs::default();
let _ = &a.json_output;
let _ = &a.records_to_show;
let _ = &a.verify_hash;
assert_eq!(spec_fields.len(), 3, "spec has 3 inspect flags");
}
#[test]
fn convert_errors_map_to_documented_exit_codes() {
use crate::error::NxsError;
assert_eq!(
exit_code_for(&NxsError::ConvertSchemaConflict("x".into())),
4
);
assert_eq!(
exit_code_for(&NxsError::ConvertParseError {
offset: 0,
msg: "bad".into()
}),
3
);
assert_eq!(exit_code_for(&NxsError::ConvertEntityExpansion), 3);
assert_eq!(exit_code_for(&NxsError::ConvertDepthExceeded), 3);
assert_eq!(exit_code_for(&NxsError::IoError("disk full".into())), 5);
assert_eq!(exit_code_for(&NxsError::BadMagic), 3);
}
#[test]
fn import_output_path_derivation_does_not_traverse() {
let cases = &[
("../foo.json", "foo.nxb"),
("/tmp/foo.json", "foo.nxb"),
("foo.json", "foo.nxb"),
("./bar/baz.csv", "baz.nxb"),
];
for (input, expected) in cases {
let p = std::path::Path::new(input);
let stem = p
.file_name()
.and_then(|n| std::path::Path::new(n).file_stem())
.expect("no file stem");
let derived = std::path::PathBuf::from(stem).with_extension("nxb");
assert_eq!(derived.to_str().unwrap_or(""), *expected, "input={input}");
assert!(
!derived
.components()
.any(|c| c == std::path::Component::ParentDir),
"traversal in derived path for input={input}"
);
}
}
}