1pub mod csv;
3pub mod env;
5pub mod html;
7pub mod json;
9pub mod jsonl;
11pub mod markdown;
13pub mod toml;
15pub mod yaml;
17
18#[cfg(feature = "msgpack")]
22pub mod msgpack;
23#[cfg(not(feature = "msgpack"))]
24pub mod msgpack {
25 use super::{FormatReader, FormatWriter};
27 use crate::value::Value;
28 use std::io::{Read, Write};
29
30 const MSG: &str = "MessagePack support requires the 'msgpack' feature.\n Install with: cargo install dkit --features msgpack";
31
32 pub struct MsgpackReader;
33 impl MsgpackReader {
34 pub fn read_from_bytes(&self, _bytes: &[u8]) -> anyhow::Result<Value> {
35 anyhow::bail!(MSG)
36 }
37 }
38 impl FormatReader for MsgpackReader {
39 fn read(&self, _: &str) -> anyhow::Result<Value> {
40 anyhow::bail!(MSG)
41 }
42 fn read_from_reader(&self, _: impl Read) -> anyhow::Result<Value> {
43 anyhow::bail!(MSG)
44 }
45 }
46 pub struct MsgpackWriter;
47 impl MsgpackWriter {
48 pub fn write_bytes(&self, _value: &Value) -> anyhow::Result<Vec<u8>> {
49 anyhow::bail!(MSG)
50 }
51 }
52 impl FormatWriter for MsgpackWriter {
53 fn write(&self, _: &Value) -> anyhow::Result<String> {
54 anyhow::bail!(MSG)
55 }
56 fn write_to_writer(&self, _: &Value, _: impl Write) -> anyhow::Result<()> {
57 anyhow::bail!(MSG)
58 }
59 }
60}
61
62#[cfg(feature = "parquet")]
64pub mod parquet;
65#[cfg(not(feature = "parquet"))]
66pub mod parquet {
67 use crate::value::Value;
69
70 const MSG: &str = "Parquet support requires the 'parquet' feature.\n Install with: cargo install dkit --features parquet";
71
72 #[derive(Debug, Clone, Default)]
73 pub struct ParquetOptions {
74 pub row_group: Option<usize>,
75 }
76 pub struct ParquetReader {
77 _options: ParquetOptions,
78 }
79 impl ParquetReader {
80 pub fn new(options: ParquetOptions) -> Self {
81 Self { _options: options }
82 }
83 pub fn read_from_bytes(&self, _bytes: &[u8]) -> anyhow::Result<Value> {
84 anyhow::bail!(MSG)
85 }
86 #[allow(dead_code)]
87 pub fn read_metadata(_bytes: &[u8]) -> anyhow::Result<ParquetMetadata> {
88 anyhow::bail!(MSG)
89 }
90 }
91 #[allow(dead_code)]
92 pub struct ParquetMetadata {
93 pub num_rows: usize,
94 pub num_row_groups: usize,
95 pub columns: Vec<String>,
96 pub column_types: Vec<String>,
97 }
98 #[derive(Debug, Clone, Default)]
99 pub enum ParquetCompression {
100 #[default]
101 None,
102 Snappy,
103 Gzip,
104 Zstd,
105 }
106 impl std::str::FromStr for ParquetCompression {
107 type Err = anyhow::Error;
108 fn from_str(s: &str) -> anyhow::Result<Self> {
109 match s.to_lowercase().as_str() {
110 "none" | "uncompressed" => Ok(Self::None),
111 "snappy" => Ok(Self::Snappy),
112 "gzip" => Ok(Self::Gzip),
113 "zstd" => Ok(Self::Zstd),
114 _ => anyhow::bail!(
115 "Unknown Parquet compression '{}'. Valid options: none, snappy, gzip, zstd",
116 s
117 ),
118 }
119 }
120 }
121 #[derive(Debug, Clone, Default)]
122 pub struct ParquetWriteOptions {
123 pub compression: ParquetCompression,
124 pub row_group_size: Option<usize>,
125 }
126 pub struct ParquetWriter {
127 _options: ParquetWriteOptions,
128 }
129 impl ParquetWriter {
130 pub fn new(options: ParquetWriteOptions) -> Self {
131 Self { _options: options }
132 }
133 pub fn write_to_bytes(&self, _value: &Value) -> anyhow::Result<Vec<u8>> {
134 anyhow::bail!(MSG)
135 }
136 }
137 pub fn arrow_value_to_value(_array: &dyn std::any::Any, _idx: usize) -> Value {
139 Value::Null
140 }
141}
142
143#[cfg(feature = "sqlite")]
145pub mod sqlite;
146#[cfg(not(feature = "sqlite"))]
147pub mod sqlite {
148 use crate::value::Value;
150 use std::path::Path;
151
152 const MSG: &str = "SQLite support requires the 'sqlite' feature.\n Install with: cargo install dkit --features sqlite";
153
154 #[derive(Debug, Clone, Default)]
155 pub struct SqliteOptions {
156 pub table: Option<String>,
157 pub sql: Option<String>,
158 }
159 pub struct SqliteReader {
160 _options: SqliteOptions,
161 }
162 impl SqliteReader {
163 pub fn new(options: SqliteOptions) -> Self {
164 Self { _options: options }
165 }
166 pub fn read_from_path(&self, _path: &Path) -> anyhow::Result<Value> {
167 anyhow::bail!(MSG)
168 }
169 pub fn list_tables(_path: &Path) -> anyhow::Result<Vec<String>> {
170 anyhow::bail!(MSG)
171 }
172 }
173}
174
175#[cfg(feature = "excel")]
177pub mod xlsx;
178#[cfg(not(feature = "excel"))]
179pub mod xlsx {
180 use crate::value::Value;
182
183 const MSG: &str = "Excel support requires the 'excel' feature.\n Install with: cargo install dkit --features excel";
184
185 #[derive(Debug, Clone, Default)]
186 pub struct XlsxOptions {
187 pub sheet: Option<String>,
188 pub header_row: usize,
189 }
190 pub struct XlsxReader {
191 _options: XlsxOptions,
192 }
193 impl XlsxReader {
194 pub fn new(options: XlsxOptions) -> Self {
195 Self { _options: options }
196 }
197 pub fn read_from_bytes(&self, _bytes: &[u8]) -> anyhow::Result<Value> {
198 anyhow::bail!(MSG)
199 }
200 pub fn list_sheets(_bytes: &[u8]) -> anyhow::Result<Vec<String>> {
201 anyhow::bail!(MSG)
202 }
203 }
204}
205
206#[cfg(feature = "xml")]
208pub mod xml;
209#[cfg(not(feature = "xml"))]
210pub mod xml {
211 use super::{FormatReader, FormatWriter};
213 use crate::value::Value;
214 use std::io::{Read, Write};
215
216 const MSG: &str = "XML support requires the 'xml' feature.\n Install with: cargo install dkit --features xml";
217
218 #[derive(Default)]
219 pub struct XmlReader {
220 _private: (),
221 }
222 impl XmlReader {
223 #[allow(dead_code)]
224 pub fn new(_strip_namespaces: bool) -> Self {
225 Self { _private: () }
226 }
227 }
228 impl FormatReader for XmlReader {
229 fn read(&self, _: &str) -> anyhow::Result<Value> {
230 anyhow::bail!(MSG)
231 }
232 fn read_from_reader(&self, _: impl Read) -> anyhow::Result<Value> {
233 anyhow::bail!(MSG)
234 }
235 }
236 pub struct XmlWriter {
237 _private: (),
238 }
239 impl XmlWriter {
240 pub fn new(_pretty: bool, _root_element: Option<String>) -> Self {
241 Self { _private: () }
242 }
243 }
244 impl FormatWriter for XmlWriter {
245 fn write(&self, _: &Value) -> anyhow::Result<String> {
246 anyhow::bail!(MSG)
247 }
248 fn write_to_writer(&self, _: &Value, _: impl Write) -> anyhow::Result<()> {
249 anyhow::bail!(MSG)
250 }
251 }
252}
253
254use std::io::{Read, Write};
255use std::path::Path;
256
257use crate::error::DkitError;
258use crate::value::Value;
259
260#[derive(Debug, Clone, Copy, PartialEq)]
265#[non_exhaustive]
266pub enum Format {
267 Json,
269 Jsonl,
271 Csv,
273 Yaml,
275 Toml,
277 Xml,
279 Msgpack,
281 Xlsx,
283 Sqlite,
285 Parquet,
287 Markdown,
289 Html,
291 Table,
293 Env,
295}
296
297impl Format {
298 #[allow(clippy::should_implement_trait)]
299 pub fn from_str(s: &str) -> Result<Self, DkitError> {
300 match s.to_lowercase().as_str() {
301 "json" => Ok(Format::Json),
302 "jsonl" | "jsonlines" | "ndjson" => Ok(Format::Jsonl),
303 "csv" | "tsv" => Ok(Format::Csv),
304 "yaml" | "yml" => Ok(Format::Yaml),
305 "toml" => Ok(Format::Toml),
306 "xml" => Ok(Format::Xml),
307 "msgpack" | "messagepack" => Ok(Format::Msgpack),
308 "xlsx" | "excel" | "xls" => Ok(Format::Xlsx),
309 "sqlite" | "sqlite3" | "db" => Ok(Format::Sqlite),
310 "parquet" | "pq" => Ok(Format::Parquet),
311 "md" | "markdown" => Ok(Format::Markdown),
312 "html" => Ok(Format::Html),
313 "table" => Ok(Format::Table),
314 "env" | "dotenv" => Ok(Format::Env),
315 _ => Err(DkitError::UnknownFormat(s.to_string())),
316 }
317 }
318
319 pub fn list_output_formats() -> Vec<(&'static str, &'static str)> {
321 let mut formats = vec![
322 ("json", "JSON format"),
323 ("csv", "Comma-separated values"),
324 ("tsv", "Tab-separated values (CSV variant)"),
325 ("yaml", "YAML format"),
326 ("toml", "TOML format"),
327 ("jsonl", "JSON Lines (one JSON object per line)"),
328 ];
329
330 if cfg!(feature = "xml") {
331 formats.push(("xml", "XML format"));
332 } else {
333 formats.push(("xml", "XML format (requires --features xml)"));
334 }
335 if cfg!(feature = "msgpack") {
336 formats.push(("msgpack", "MessagePack binary format"));
337 } else {
338 formats.push((
339 "msgpack",
340 "MessagePack binary format (requires --features msgpack)",
341 ));
342 }
343 if cfg!(feature = "excel") {
344 formats.push(("xlsx", "Excel spreadsheet (input only)"));
345 } else {
346 formats.push(("xlsx", "Excel spreadsheet (requires --features excel)"));
347 }
348 if cfg!(feature = "sqlite") {
349 formats.push(("sqlite", "SQLite database (input only)"));
350 } else {
351 formats.push(("sqlite", "SQLite database (requires --features sqlite)"));
352 }
353 if cfg!(feature = "parquet") {
354 formats.push(("parquet", "Apache Parquet columnar format"));
355 } else {
356 formats.push((
357 "parquet",
358 "Apache Parquet columnar format (requires --features parquet)",
359 ));
360 }
361
362 formats.push(("env", "Environment variables (.env) format"));
363 formats.push(("md", "Markdown table"));
364 formats.push(("html", "HTML table"));
365 formats.push(("table", "Terminal table (default for view)"));
366
367 formats
368 }
369}
370
371impl std::fmt::Display for Format {
372 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
373 match self {
374 Format::Json => write!(f, "JSON"),
375 Format::Jsonl => write!(f, "JSONL"),
376 Format::Csv => write!(f, "CSV"),
377 Format::Yaml => write!(f, "YAML"),
378 Format::Toml => write!(f, "TOML"),
379 Format::Xml => write!(f, "XML"),
380 Format::Msgpack => write!(f, "MessagePack"),
381 Format::Xlsx => write!(f, "Excel"),
382 Format::Sqlite => write!(f, "SQLite"),
383 Format::Parquet => write!(f, "Parquet"),
384 Format::Markdown => write!(f, "Markdown"),
385 Format::Html => write!(f, "HTML"),
386 Format::Table => write!(f, "Table"),
387 Format::Env => write!(f, "ENV"),
388 }
389 }
390}
391
392pub fn detect_format(path: &Path) -> Result<Format, DkitError> {
394 if let Some(name) = path.file_name().and_then(|n| n.to_str()) {
396 if name == ".env" || name.starts_with(".env.") {
397 return Ok(Format::Env);
398 }
399 }
400
401 match path.extension().and_then(|e| e.to_str()) {
402 Some("json") => Ok(Format::Json),
403 Some("jsonl" | "ndjson") => Ok(Format::Jsonl),
404 Some("csv" | "tsv") => Ok(Format::Csv),
405 Some("yaml" | "yml") => Ok(Format::Yaml),
406 Some("toml") => Ok(Format::Toml),
407 Some("xml") => Ok(Format::Xml),
408 Some("msgpack") => Ok(Format::Msgpack),
409 Some("xlsx" | "xls" | "xlsm" | "xlsb" | "ods") => Ok(Format::Xlsx),
410 Some("db" | "sqlite" | "sqlite3") => Ok(Format::Sqlite),
411 Some("parquet" | "pq") => Ok(Format::Parquet),
412 Some("md") => Ok(Format::Markdown),
413 Some("html") => Ok(Format::Html),
414 Some("env") => Ok(Format::Env),
415 Some(ext) => Err(DkitError::UnknownFormat(ext.to_string())),
416 None => Err(DkitError::UnknownFormat("(no extension)".to_string())),
417 }
418}
419
420pub fn detect_format_from_content(content: &str) -> Result<(Format, Option<char>), DkitError> {
430 let trimmed = content.trim_start();
431
432 if trimmed.is_empty() {
433 return Err(DkitError::FormatDetectionFailed(
434 "input is empty".to_string(),
435 ));
436 }
437
438 if trimmed.starts_with("<?xml") || trimmed.starts_with("<!DOCTYPE") {
440 return Ok((Format::Xml, None));
441 }
442
443 let mut lines = trimmed.lines().filter(|l| !l.trim().is_empty());
445 if let Some(first_line) = lines.next() {
446 if let Some(second_line) = lines.next() {
447 let first_trimmed = first_line.trim();
448 let second_trimmed = second_line.trim();
449 if first_trimmed.starts_with('{')
450 && first_trimmed.ends_with('}')
451 && second_trimmed.starts_with('{')
452 && second_trimmed.ends_with('}')
453 {
454 return Ok((Format::Jsonl, None));
455 }
456 }
457 }
458
459 if trimmed.starts_with('{') {
461 return Ok((Format::Json, None));
462 }
463
464 if trimmed.starts_with('[') {
468 let first_line = trimmed.lines().next().unwrap_or("").trim();
469 let is_toml_section = first_line.starts_with("[[")
471 || (first_line.starts_with('[')
472 && first_line.ends_with(']')
473 && !first_line.contains(',')
474 && first_line[1..first_line.len() - 1].chars().all(|c| {
475 c.is_alphanumeric() || c == '_' || c == '-' || c == '.' || c == ' ' || c == '"'
476 }));
477 if is_toml_section {
478 return Ok((Format::Toml, None));
479 }
480 return Ok((Format::Json, None));
481 }
482
483 if trimmed.starts_with('<') {
485 return Ok((Format::Xml, None));
486 }
487
488 if let Some(first_line) = trimmed.lines().next() {
490 if first_line.contains('\t') {
491 return Ok((Format::Csv, Some('\t')));
492 }
493 }
494
495 let first_line = trimmed.lines().next().unwrap_or("");
498 let ft = first_line.trim();
499 let env_line = ft.strip_prefix("export ").unwrap_or(ft);
500 if let Some(eq_pos) = env_line.find('=') {
501 let key_part = env_line[..eq_pos].trim();
502 if !key_part.is_empty()
503 && !key_part.contains(' ')
504 && key_part
505 .chars()
506 .all(|c| c.is_ascii_uppercase() || c.is_ascii_digit() || c == '_')
507 {
508 let env_lines = trimmed
510 .lines()
511 .filter(|l| {
512 let t = l.trim();
513 !t.is_empty() && !t.starts_with('#')
514 })
515 .take(5);
516 let all_env = env_lines.clone().all(|l| {
517 let l = l.trim().strip_prefix("export ").unwrap_or(l.trim());
518 if let Some(p) = l.find('=') {
519 let k = l[..p].trim();
520 !k.is_empty()
521 && !k.contains(' ')
522 && k.chars()
523 .all(|c| c.is_ascii_uppercase() || c.is_ascii_digit() || c == '_')
524 } else {
525 false
526 }
527 });
528 if all_env {
529 return Ok((Format::Env, None));
530 }
531 }
532 }
533
534 if ft.contains(" = ") {
536 return Ok((Format::Toml, None));
537 }
538
539 if ft.starts_with("---") || ft.contains(": ") || ft.ends_with(':') {
541 return Ok((Format::Yaml, None));
542 }
543
544 if ft.contains(',') {
546 return Ok((Format::Csv, None));
547 }
548
549 Err(DkitError::FormatDetectionFailed(
550 "could not determine format from content".to_string(),
551 ))
552}
553
554pub fn default_delimiter(path: &Path) -> Option<char> {
557 match path.extension().and_then(|e| e.to_str()) {
558 Some("tsv") => Some('\t'),
559 _ => None,
560 }
561}
562
563pub fn default_delimiter_for_format(format_str: &str) -> Option<char> {
565 match format_str.to_lowercase().as_str() {
566 "tsv" => Some('\t'),
567 _ => None,
568 }
569}
570
571#[derive(Debug, Clone)]
575pub struct FormatOptions {
576 pub delimiter: Option<char>,
578 pub no_header: bool,
580 pub pretty: bool,
582 pub compact: bool,
584 pub flow_style: bool,
586 pub root_element: Option<String>,
588 pub styled: bool,
590 pub full_html: bool,
592}
593
594impl Default for FormatOptions {
595 fn default() -> Self {
596 Self {
597 delimiter: None,
598 no_header: false,
599 pretty: true,
600 compact: false,
601 flow_style: false,
602 root_element: None,
603 styled: false,
604 full_html: false,
605 }
606 }
607}
608
609#[allow(dead_code)]
613pub trait FormatReader {
614 fn read(&self, input: &str) -> anyhow::Result<Value>;
616
617 fn read_from_reader(&self, reader: impl Read) -> anyhow::Result<Value>;
619}
620
621#[allow(dead_code)]
625pub trait FormatWriter {
626 fn write(&self, value: &Value) -> anyhow::Result<String>;
628
629 fn write_to_writer(&self, value: &Value, writer: impl Write) -> anyhow::Result<()>;
631}
632
633#[cfg(test)]
634mod tests {
635 use super::*;
636 use std::path::PathBuf;
637
638 #[test]
641 fn test_format_from_str() {
642 assert_eq!(Format::from_str("json").unwrap(), Format::Json);
643 assert_eq!(Format::from_str("JSON").unwrap(), Format::Json);
644 assert_eq!(Format::from_str("csv").unwrap(), Format::Csv);
645 assert_eq!(Format::from_str("tsv").unwrap(), Format::Csv);
646 assert_eq!(Format::from_str("TSV").unwrap(), Format::Csv);
647 assert_eq!(Format::from_str("yaml").unwrap(), Format::Yaml);
648 assert_eq!(Format::from_str("yml").unwrap(), Format::Yaml);
649 assert_eq!(Format::from_str("toml").unwrap(), Format::Toml);
650 }
651
652 #[test]
653 fn test_format_from_str_jsonl() {
654 assert_eq!(Format::from_str("jsonl").unwrap(), Format::Jsonl);
655 assert_eq!(Format::from_str("jsonlines").unwrap(), Format::Jsonl);
656 assert_eq!(Format::from_str("ndjson").unwrap(), Format::Jsonl);
657 assert_eq!(Format::from_str("JSONL").unwrap(), Format::Jsonl);
658 }
659
660 #[test]
661 fn test_format_from_str_xml() {
662 assert_eq!(Format::from_str("xml").unwrap(), Format::Xml);
663 }
664
665 #[test]
666 fn test_format_from_str_msgpack() {
667 assert_eq!(Format::from_str("msgpack").unwrap(), Format::Msgpack);
668 assert_eq!(Format::from_str("messagepack").unwrap(), Format::Msgpack);
669 }
670
671 #[test]
672 fn test_format_from_str_markdown() {
673 assert_eq!(Format::from_str("md").unwrap(), Format::Markdown);
674 assert_eq!(Format::from_str("markdown").unwrap(), Format::Markdown);
675 assert_eq!(Format::from_str("MD").unwrap(), Format::Markdown);
676 }
677
678 #[test]
679 fn test_format_from_str_unknown() {
680 let err = Format::from_str("bin").unwrap_err();
681 assert!(matches!(err, DkitError::UnknownFormat(s) if s == "bin"));
682 }
683
684 #[test]
687 fn test_format_display() {
688 assert_eq!(Format::Json.to_string(), "JSON");
689 assert_eq!(Format::Csv.to_string(), "CSV");
690 assert_eq!(Format::Yaml.to_string(), "YAML");
691 assert_eq!(Format::Toml.to_string(), "TOML");
692 assert_eq!(Format::Jsonl.to_string(), "JSONL");
693 assert_eq!(Format::Xml.to_string(), "XML");
694 assert_eq!(Format::Msgpack.to_string(), "MessagePack");
695 assert_eq!(Format::Markdown.to_string(), "Markdown");
696 assert_eq!(Format::Table.to_string(), "Table");
697 }
698
699 #[test]
700 fn test_format_from_str_table() {
701 assert_eq!(Format::from_str("table").unwrap(), Format::Table);
702 assert_eq!(Format::from_str("TABLE").unwrap(), Format::Table);
703 }
704
705 #[test]
706 fn test_list_output_formats() {
707 let formats = Format::list_output_formats();
708 assert!(formats.len() >= 10);
709 assert!(formats.iter().any(|(name, _)| *name == "table"));
710 assert!(formats.iter().any(|(name, _)| *name == "json"));
711 }
712
713 #[test]
716 fn test_detect_format_json() {
717 assert_eq!(
718 detect_format(&PathBuf::from("data.json")).unwrap(),
719 Format::Json
720 );
721 }
722
723 #[test]
724 fn test_detect_format_csv_tsv() {
725 assert_eq!(
726 detect_format(&PathBuf::from("data.csv")).unwrap(),
727 Format::Csv
728 );
729 assert_eq!(
730 detect_format(&PathBuf::from("data.tsv")).unwrap(),
731 Format::Csv
732 );
733 }
734
735 #[test]
736 fn test_detect_format_yaml() {
737 assert_eq!(
738 detect_format(&PathBuf::from("data.yaml")).unwrap(),
739 Format::Yaml
740 );
741 assert_eq!(
742 detect_format(&PathBuf::from("data.yml")).unwrap(),
743 Format::Yaml
744 );
745 }
746
747 #[test]
748 fn test_detect_format_toml() {
749 assert_eq!(
750 detect_format(&PathBuf::from("config.toml")).unwrap(),
751 Format::Toml
752 );
753 }
754
755 #[test]
756 fn test_detect_format_jsonl() {
757 assert_eq!(
758 detect_format(&PathBuf::from("data.jsonl")).unwrap(),
759 Format::Jsonl
760 );
761 assert_eq!(
762 detect_format(&PathBuf::from("data.ndjson")).unwrap(),
763 Format::Jsonl
764 );
765 }
766
767 #[test]
768 fn test_detect_format_xml() {
769 assert_eq!(
770 detect_format(&PathBuf::from("data.xml")).unwrap(),
771 Format::Xml
772 );
773 }
774
775 #[test]
776 fn test_detect_format_msgpack() {
777 assert_eq!(
778 detect_format(&PathBuf::from("data.msgpack")).unwrap(),
779 Format::Msgpack
780 );
781 }
782
783 #[test]
784 fn test_detect_format_markdown() {
785 assert_eq!(
786 detect_format(&PathBuf::from("output.md")).unwrap(),
787 Format::Markdown
788 );
789 }
790
791 #[test]
792 fn test_detect_format_unknown_ext() {
793 let err = detect_format(&PathBuf::from("data.bin")).unwrap_err();
794 assert!(matches!(err, DkitError::UnknownFormat(s) if s == "bin"));
795 }
796
797 #[test]
798 fn test_detect_format_no_extension() {
799 let err = detect_format(&PathBuf::from("Makefile")).unwrap_err();
800 assert!(matches!(err, DkitError::UnknownFormat(s) if s == "(no extension)"));
801 }
802
803 #[test]
808 fn test_default_delimiter_tsv() {
809 assert_eq!(default_delimiter(&PathBuf::from("data.tsv")), Some('\t'));
810 }
811
812 #[test]
813 fn test_default_delimiter_csv() {
814 assert_eq!(default_delimiter(&PathBuf::from("data.csv")), None);
815 }
816
817 #[test]
818 fn test_default_delimiter_json() {
819 assert_eq!(default_delimiter(&PathBuf::from("data.json")), None);
820 }
821
822 #[test]
823 fn test_default_delimiter_for_format_tsv() {
824 assert_eq!(default_delimiter_for_format("tsv"), Some('\t'));
825 assert_eq!(default_delimiter_for_format("TSV"), Some('\t'));
826 }
827
828 #[test]
829 fn test_default_delimiter_for_format_csv() {
830 assert_eq!(default_delimiter_for_format("csv"), None);
831 }
832
833 #[test]
836 fn test_format_options_default() {
837 let opts = FormatOptions::default();
838 assert_eq!(opts.delimiter, None);
839 assert!(!opts.no_header);
840 assert!(opts.pretty);
841 assert!(!opts.compact);
842 assert!(!opts.flow_style);
843 assert_eq!(opts.root_element, None);
844 }
845
846 #[test]
849 fn test_sniff_xml_declaration() {
850 let (fmt, delim) = detect_format_from_content("<?xml version=\"1.0\"?>\n<root/>").unwrap();
851 assert_eq!(fmt, Format::Xml);
852 assert_eq!(delim, None);
853 }
854
855 #[test]
856 fn test_sniff_xml_tag() {
857 let (fmt, _) = detect_format_from_content("<root><item>hello</item></root>").unwrap();
858 assert_eq!(fmt, Format::Xml);
859 }
860
861 #[test]
862 fn test_sniff_json_object() {
863 let (fmt, _) = detect_format_from_content("{\"name\": \"Alice\"}").unwrap();
864 assert_eq!(fmt, Format::Json);
865 }
866
867 #[test]
868 fn test_sniff_json_array() {
869 let (fmt, _) = detect_format_from_content("[1, 2, 3]").unwrap();
870 assert_eq!(fmt, Format::Json);
871 }
872
873 #[test]
874 fn test_sniff_jsonl() {
875 let content = "{\"name\": \"Alice\"}\n{\"name\": \"Bob\"}\n";
876 let (fmt, _) = detect_format_from_content(content).unwrap();
877 assert_eq!(fmt, Format::Jsonl);
878 }
879
880 #[test]
881 fn test_sniff_tsv() {
882 let content = "name\tage\tcity\nAlice\t30\tSeoul\n";
883 let (fmt, delim) = detect_format_from_content(content).unwrap();
884 assert_eq!(fmt, Format::Csv);
885 assert_eq!(delim, Some('\t'));
886 }
887
888 #[test]
889 fn test_sniff_toml_section() {
890 let content = "[database]\nhost = \"localhost\"\nport = 5432\n";
891 let (fmt, _) = detect_format_from_content(content).unwrap();
892 assert_eq!(fmt, Format::Toml);
893 }
894
895 #[test]
896 fn test_sniff_toml_key_value() {
897 let content = "title = \"My App\"\nversion = \"1.0\"\n";
898 let (fmt, _) = detect_format_from_content(content).unwrap();
899 assert_eq!(fmt, Format::Toml);
900 }
901
902 #[test]
903 fn test_sniff_yaml_document() {
904 let content = "---\nname: Alice\nage: 30\n";
905 let (fmt, _) = detect_format_from_content(content).unwrap();
906 assert_eq!(fmt, Format::Yaml);
907 }
908
909 #[test]
910 fn test_sniff_yaml_key_value() {
911 let content = "name: Alice\nage: 30\n";
912 let (fmt, _) = detect_format_from_content(content).unwrap();
913 assert_eq!(fmt, Format::Yaml);
914 }
915
916 #[test]
917 fn test_sniff_csv() {
918 let content = "name,age,city\nAlice,30,Seoul\n";
919 let (fmt, delim) = detect_format_from_content(content).unwrap();
920 assert_eq!(fmt, Format::Csv);
921 assert_eq!(delim, None);
922 }
923
924 #[test]
925 fn test_sniff_empty_content() {
926 let err = detect_format_from_content("").unwrap_err();
927 assert!(matches!(err, DkitError::FormatDetectionFailed(_)));
928 }
929
930 #[test]
931 fn test_sniff_whitespace_only() {
932 let err = detect_format_from_content(" \n \n").unwrap_err();
933 assert!(matches!(err, DkitError::FormatDetectionFailed(_)));
934 }
935}