1#![deny(
20 clippy::unwrap_used,
21 clippy::expect_used,
22 clippy::panic,
23 clippy::indexing_slicing
24)]
25
26use crate::error::Result;
27
28pub mod csv_in;
29pub mod csv_out;
30pub mod infer;
31pub mod inspect;
32pub mod json_in;
33pub mod json_out;
34pub mod xml_in;
35
36#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
40pub enum VerifyPolicy {
41 #[default]
43 Auto,
44 Force,
46 Off,
48}
49
50#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
54pub enum BinaryEncoding {
55 #[default]
56 Base64,
57 Hex,
58 Skip,
59}
60
61#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
65pub enum XmlAttrsMode {
66 #[default]
68 AsFields,
69 Prefix,
71}
72
73#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
77pub enum ConflictPolicy {
78 #[default]
80 Error,
81 CoerceString,
83 FirstWins,
85}
86
87#[derive(Debug, Default)]
95pub struct InferredSchema {
96 pub keys: Vec<InferredKey>,
97 pub key_states: Vec<crate::convert::infer::KeyState>,
99 pub total_records: usize,
101}
102
103#[derive(Debug)]
104pub struct InferredKey {
105 pub name: String,
106 pub sigil: u8,
108 pub optional: bool,
109 pub list_of: Option<u8>,
111}
112
113#[derive(Debug, Default, Clone)]
117pub struct CommonOpts {
118 pub input_path: Option<std::path::PathBuf>,
120 pub output_path: Option<std::path::PathBuf>,
122}
123
124#[derive(Debug)]
128pub struct ImportArgs {
129 pub common: CommonOpts,
130 pub from: ImportFormat,
132 pub schema_hint: Option<std::path::PathBuf>,
134 pub conflict: ConflictPolicy,
136 pub root: Option<String>,
138 pub csv_delimiter: Option<char>,
140 pub csv_no_header: bool,
142 pub xml_record_tag: Option<String>,
144 pub xml_attrs: XmlAttrsMode,
146 pub buffer_records: usize,
148 pub max_depth: usize,
150 pub xml_max_depth: usize,
152 pub tail_index_spill: bool,
154 pub verify: VerifyPolicy,
156}
157
158impl Default for ImportArgs {
159 fn default() -> Self {
160 Self {
161 common: CommonOpts::default(),
162 from: ImportFormat::default(),
163 schema_hint: None,
164 conflict: ConflictPolicy::default(),
165 root: None,
166 csv_delimiter: None,
167 csv_no_header: false,
168 xml_record_tag: None,
169 xml_attrs: XmlAttrsMode::default(),
170 buffer_records: 4096,
171 max_depth: 64,
172 xml_max_depth: 64,
173 tail_index_spill: false,
174 verify: VerifyPolicy::default(),
175 }
176 }
177}
178
179#[derive(Debug, Default, Clone, Copy, PartialEq, Eq)]
181pub enum ImportFormat {
182 #[default]
183 Json,
184 Csv,
185 Xml,
186}
187
188#[derive(Debug, Default)]
190pub struct ImportReport {
191 pub records_written: usize,
192 pub output_bytes: usize,
193}
194
195#[derive(Debug, Default)]
199pub struct ExportArgs {
200 pub common: CommonOpts,
201 pub to: ExportFormat,
203 pub pretty: bool,
205 pub ndjson: bool,
207 pub columns: Option<Vec<String>>,
209 pub csv_delimiter: Option<char>,
211 pub binary: BinaryEncoding,
213 pub csv_safe: bool,
215}
216
217#[derive(Debug, Default, Clone, Copy, PartialEq, Eq)]
219pub enum ExportFormat {
220 #[default]
221 Json,
222 Csv,
223}
224
225#[derive(Debug, Default)]
227pub struct ExportReport {
228 pub records_read: usize,
229 pub output_bytes: usize,
230}
231
232#[derive(Debug, Default)]
236pub struct InspectArgs {
237 pub common: CommonOpts,
238 pub json_output: bool,
240 pub records_to_show: Option<usize>,
242 pub record_index: Option<usize>,
245 pub verify_hash: bool,
247}
248
249#[derive(Debug, Default)]
251pub struct InspectReport {
252 pub dict_hash_ok: Option<bool>,
254 pub record_count: usize,
255}
256
257#[derive(Debug, serde::Deserialize)]
262struct SchemaHintFile {
263 keys: std::collections::HashMap<String, SchemaHintKey>,
264}
265
266#[derive(Debug, serde::Deserialize)]
267struct SchemaHintKey {
268 sigil: String,
269 #[serde(default)]
270 optional: bool,
271 list_of: Option<String>,
272}
273
274pub fn load_schema_hint(path: &std::path::Path) -> Result<InferredSchema> {
277 let text = std::fs::read_to_string(path)
278 .map_err(|e| crate::error::NxsError::IoError(format!("{}: {e}", path.display())))?;
279 let hint: SchemaHintFile = serde_yaml2::de::from_str(&text).map_err(|e| {
280 crate::error::NxsError::ConvertParseError {
281 offset: 0,
282 msg: format!("schema hint YAML parse error: {e}"),
283 }
284 })?;
285
286 let keys = hint
287 .keys
288 .into_iter()
289 .map(|(name, k)| {
290 let sigil = k.sigil.bytes().next().unwrap_or(b'"');
291 let list_of = k.list_of.as_deref().and_then(|s| s.bytes().next());
292 InferredKey {
293 name,
294 sigil,
295 optional: k.optional,
296 list_of,
297 }
298 })
299 .collect();
300
301 Ok(InferredSchema {
302 keys,
303 key_states: vec![],
304 total_records: 0,
305 })
306}
307
308pub fn exit_code_for(err: &crate::error::NxsError) -> i32 {
320 use crate::error::NxsError;
321 match err {
322 NxsError::ConvertSchemaConflict(_) => 4,
323 NxsError::ConvertParseError { .. }
324 | NxsError::ConvertEntityExpansion
325 | NxsError::ConvertDepthExceeded
326 | NxsError::BadMagic
327 | NxsError::OutOfBounds
328 | NxsError::RecursionLimit => 3,
329 NxsError::IoError(_) => 5,
330 _ => 1,
331 }
332}
333
334pub fn run_import(args: &ImportArgs) -> Result<ImportReport> {
338 use crate::convert::json_in;
339 use std::io::BufReader;
340
341 let input_path = args.common.input_path.as_deref();
342 let output_path = args.common.output_path.as_deref();
343
344 match args.from {
345 ImportFormat::Json => {
346 match input_path {
348 Some(path) => {
349 let schema = if let Some(hint_path) = &args.schema_hint {
351 load_schema_hint(hint_path)?
352 } else {
353 let f1 = std::fs::File::open(path).map_err(|e| {
354 crate::error::NxsError::IoError(format!("{}: {e}", path.display()))
355 })?;
356 json_in::infer_schema(BufReader::new(f1), args)?
357 };
358
359 let f2 = std::fs::File::open(path).map_err(|e| {
361 crate::error::NxsError::IoError(format!("{}: {e}", path.display()))
362 })?;
363
364 match output_path {
365 Some(out_path) => {
366 let out = std::fs::File::create(out_path).map_err(|e| {
367 crate::error::NxsError::IoError(format!(
368 "{}: {e}",
369 out_path.display()
370 ))
371 })?;
372 json_in::emit(BufReader::new(f2), out, &schema, args)
373 }
374 None => json_in::emit(BufReader::new(f2), std::io::stdout(), &schema, args),
375 }
376 }
377 None => {
378 let mut spill = tempfile::NamedTempFile::new()
381 .map_err(|e| crate::error::NxsError::IoError(e.to_string()))?;
382 std::io::copy(&mut std::io::stdin(), &mut spill)
383 .map_err(|e| crate::error::NxsError::IoError(e.to_string()))?;
384 let spill_path = spill.path().to_path_buf();
385
386 let schema = if let Some(hint_path) = &args.schema_hint {
387 load_schema_hint(hint_path)?
388 } else {
389 let f1 = std::fs::File::open(&spill_path)
390 .map_err(|e| crate::error::NxsError::IoError(e.to_string()))?;
391 json_in::infer_schema(BufReader::new(f1), args)?
392 };
393
394 let f2 = std::fs::File::open(&spill_path)
395 .map_err(|e| crate::error::NxsError::IoError(e.to_string()))?;
396 match output_path {
397 Some(out_path) => {
398 let out = std::fs::File::create(out_path).map_err(|e| {
399 crate::error::NxsError::IoError(format!(
400 "{}: {e}",
401 out_path.display()
402 ))
403 })?;
404 json_in::emit(BufReader::new(f2), out, &schema, args)
405 }
406 None => json_in::emit(BufReader::new(f2), std::io::stdout(), &schema, args),
407 }
408 }
410 }
411 }
412 ImportFormat::Csv => {
413 use crate::convert::csv_in;
414 match input_path {
415 Some(path) => {
416 let schema = if let Some(hint_path) = &args.schema_hint {
417 load_schema_hint(hint_path)?
418 } else {
419 let f1 = std::fs::File::open(path).map_err(|e| {
420 crate::error::NxsError::IoError(format!("{}: {e}", path.display()))
421 })?;
422 csv_in::infer_schema(BufReader::new(f1), args)?
423 };
424 let f2 = std::fs::File::open(path).map_err(|e| {
425 crate::error::NxsError::IoError(format!("{}: {e}", path.display()))
426 })?;
427 match output_path {
428 Some(out_path) => {
429 let out = std::fs::File::create(out_path).map_err(|e| {
430 crate::error::NxsError::IoError(format!(
431 "{}: {e}",
432 out_path.display()
433 ))
434 })?;
435 csv_in::emit(BufReader::new(f2), out, &schema, args)
436 }
437 None => csv_in::emit(BufReader::new(f2), std::io::stdout(), &schema, args),
438 }
439 }
440 None => {
441 let mut spill = tempfile::NamedTempFile::new()
442 .map_err(|e| crate::error::NxsError::IoError(e.to_string()))?;
443 std::io::copy(&mut std::io::stdin(), &mut spill)
444 .map_err(|e| crate::error::NxsError::IoError(e.to_string()))?;
445 let spill_path = spill.path().to_path_buf();
446 let schema = if let Some(hint_path) = &args.schema_hint {
447 load_schema_hint(hint_path)?
448 } else {
449 let f1 = std::fs::File::open(&spill_path)
450 .map_err(|e| crate::error::NxsError::IoError(e.to_string()))?;
451 csv_in::infer_schema(BufReader::new(f1), args)?
452 };
453 let f2 = std::fs::File::open(&spill_path)
454 .map_err(|e| crate::error::NxsError::IoError(e.to_string()))?;
455 match output_path {
456 Some(out_path) => {
457 let out = std::fs::File::create(out_path).map_err(|e| {
458 crate::error::NxsError::IoError(format!(
459 "{}: {e}",
460 out_path.display()
461 ))
462 })?;
463 csv_in::emit(BufReader::new(f2), out, &schema, args)
464 }
465 None => csv_in::emit(BufReader::new(f2), std::io::stdout(), &schema, args),
466 }
467 }
468 }
469 }
470 ImportFormat::Xml => {
471 use crate::convert::xml_in;
472 match input_path {
473 Some(path) => {
474 let schema = if let Some(hint_path) = &args.schema_hint {
475 load_schema_hint(hint_path)?
476 } else {
477 let f1 = std::fs::File::open(path).map_err(|e| {
478 crate::error::NxsError::IoError(format!("{}: {e}", path.display()))
479 })?;
480 xml_in::infer_schema(BufReader::new(f1), args)?
481 };
482 let f2 = std::fs::File::open(path).map_err(|e| {
483 crate::error::NxsError::IoError(format!("{}: {e}", path.display()))
484 })?;
485 match output_path {
486 Some(out_path) => {
487 let out = std::fs::File::create(out_path).map_err(|e| {
488 crate::error::NxsError::IoError(format!(
489 "{}: {e}",
490 out_path.display()
491 ))
492 })?;
493 xml_in::emit(BufReader::new(f2), out, &schema, args)
494 }
495 None => xml_in::emit(BufReader::new(f2), std::io::stdout(), &schema, args),
496 }
497 }
498 None => {
499 let mut spill = tempfile::NamedTempFile::new()
500 .map_err(|e| crate::error::NxsError::IoError(e.to_string()))?;
501 std::io::copy(&mut std::io::stdin(), &mut spill)
502 .map_err(|e| crate::error::NxsError::IoError(e.to_string()))?;
503 let spill_path = spill.path().to_path_buf();
504 let schema = if let Some(hint_path) = &args.schema_hint {
505 load_schema_hint(hint_path)?
506 } else {
507 let f1 = std::fs::File::open(&spill_path)
508 .map_err(|e| crate::error::NxsError::IoError(e.to_string()))?;
509 xml_in::infer_schema(BufReader::new(f1), args)?
510 };
511 let f2 = std::fs::File::open(&spill_path)
512 .map_err(|e| crate::error::NxsError::IoError(e.to_string()))?;
513 match output_path {
514 Some(out_path) => {
515 let out = std::fs::File::create(out_path).map_err(|e| {
516 crate::error::NxsError::IoError(format!(
517 "{}: {e}",
518 out_path.display()
519 ))
520 })?;
521 xml_in::emit(BufReader::new(f2), out, &schema, args)
522 }
523 None => xml_in::emit(BufReader::new(f2), std::io::stdout(), &schema, args),
524 }
525 }
526 }
527 }
528 }
529}
530
531pub fn run_export(args: &ExportArgs) -> Result<ExportReport> {
533 use crate::convert::json_out;
534
535 let input_path = args.common.input_path.as_deref();
536 let output_path = args.common.output_path.as_deref();
537
538 macro_rules! open_input {
541 ($path:expr) => {
542 std::fs::File::open($path)
543 .map_err(|e| crate::error::NxsError::IoError(format!("{}: {e}", $path.display())))
544 };
545 }
546
547 macro_rules! open_output {
548 ($path:expr) => {
549 std::fs::File::create($path)
550 .map_err(|e| crate::error::NxsError::IoError(format!("{}: {e}", $path.display())))
551 };
552 }
553
554 match args.to {
555 ExportFormat::Json => match (input_path, output_path) {
556 (Some(inp), Some(out)) => json_out::run(open_input!(inp)?, open_output!(out)?, args),
557 (Some(inp), None) => json_out::run(open_input!(inp)?, std::io::stdout(), args),
558 (None, Some(out)) => json_out::run(std::io::stdin(), open_output!(out)?, args),
559 (None, None) => json_out::run(std::io::stdin(), std::io::stdout(), args),
560 },
561 ExportFormat::Csv => {
562 use crate::convert::csv_out;
563 match (input_path, output_path) {
564 (Some(inp), Some(out)) => csv_out::run(open_input!(inp)?, open_output!(out)?, args),
565 (Some(inp), None) => csv_out::run(open_input!(inp)?, std::io::stdout(), args),
566 (None, Some(out)) => csv_out::run(std::io::stdin(), open_output!(out)?, args),
567 (None, None) => csv_out::run(std::io::stdin(), std::io::stdout(), args),
568 }
569 }
570 }
571}
572
573pub fn run_inspect(args: &InspectArgs) -> Result<InspectReport> {
575 use crate::convert::inspect;
576 if args.json_output {
577 inspect::render_json(std::io::stdout(), args)
578 } else {
579 inspect::render_text(std::io::stdout(), args)
580 }
581}
582
583#[cfg(test)]
586mod tests {
587 use super::*;
588
589 #[test]
592 fn import_args_maps_every_spec_flag() {
593 let spec_fields: &[&str] = &[
597 "from",
598 "schema_hint",
599 "conflict",
600 "root",
601 "csv_delimiter",
602 "csv_no_header",
603 "xml_record_tag",
604 "xml_attrs",
605 "buffer_records",
606 "max_depth",
607 "xml_max_depth",
608 "tail_index_spill",
609 "verify",
610 ];
611 let a = ImportArgs::default();
613 let _ = &a.from;
614 let _ = &a.schema_hint;
615 let _ = &a.conflict;
616 let _ = &a.root;
617 let _ = &a.csv_delimiter;
618 let _ = &a.csv_no_header;
619 let _ = &a.xml_record_tag;
620 let _ = &a.xml_attrs;
621 let _ = &a.buffer_records;
622 let _ = &a.max_depth;
623 let _ = &a.xml_max_depth;
624 let _ = &a.tail_index_spill;
625 let _ = &a.verify;
626 assert_eq!(spec_fields.len(), 13, "spec has 13 import flags");
627 }
628
629 #[test]
630 fn export_args_maps_every_spec_flag() {
631 let spec_fields: &[&str] = &[
632 "to",
633 "pretty",
634 "ndjson",
635 "columns",
636 "csv_delimiter",
637 "binary",
638 "csv_safe",
639 ];
640 let a = ExportArgs::default();
641 let _ = &a.to;
642 let _ = &a.pretty;
643 let _ = &a.ndjson;
644 let _ = &a.columns;
645 let _ = &a.csv_delimiter;
646 let _ = &a.binary;
647 let _ = &a.csv_safe;
648 assert_eq!(spec_fields.len(), 7, "spec has 7 export flags");
649 }
650
651 #[test]
652 fn inspect_args_maps_every_spec_flag() {
653 let spec_fields: &[&str] = &["json_output", "records_to_show", "verify_hash"];
654 let a = InspectArgs::default();
655 let _ = &a.json_output;
656 let _ = &a.records_to_show;
657 let _ = &a.verify_hash;
658 assert_eq!(spec_fields.len(), 3, "spec has 3 inspect flags");
659 }
660
661 #[test]
663 fn convert_errors_map_to_documented_exit_codes() {
664 use crate::error::NxsError;
665 assert_eq!(
666 exit_code_for(&NxsError::ConvertSchemaConflict("x".into())),
667 4
668 );
669 assert_eq!(
670 exit_code_for(&NxsError::ConvertParseError {
671 offset: 0,
672 msg: "bad".into()
673 }),
674 3
675 );
676 assert_eq!(exit_code_for(&NxsError::ConvertEntityExpansion), 3);
677 assert_eq!(exit_code_for(&NxsError::ConvertDepthExceeded), 3);
678 assert_eq!(exit_code_for(&NxsError::IoError("disk full".into())), 5);
679 assert_eq!(exit_code_for(&NxsError::BadMagic), 3);
680 }
681
682 #[test]
684 fn import_output_path_derivation_does_not_traverse() {
685 let cases = &[
686 ("../foo.json", "foo.nxb"),
687 ("/tmp/foo.json", "foo.nxb"),
688 ("foo.json", "foo.nxb"),
689 ("./bar/baz.csv", "baz.nxb"),
690 ];
691 for (input, expected) in cases {
692 let p = std::path::Path::new(input);
693 let stem = p
694 .file_name()
695 .and_then(|n| std::path::Path::new(n).file_stem())
696 .expect("no file stem");
697 let derived = std::path::PathBuf::from(stem).with_extension("nxb");
698 assert_eq!(derived.to_str().unwrap_or(""), *expected, "input={input}");
699 assert!(
701 !derived
702 .components()
703 .any(|c| c == std::path::Component::ParentDir),
704 "traversal in derived path for input={input}"
705 );
706 }
707 }
708}