1pub mod csv;
3pub mod html;
5pub mod json;
7pub mod jsonl;
9pub mod markdown;
11pub mod toml;
13pub mod yaml;
15
16#[cfg(feature = "msgpack")]
20pub mod msgpack;
21#[cfg(not(feature = "msgpack"))]
22pub mod msgpack {
23 use super::{FormatReader, FormatWriter};
25 use crate::value::Value;
26 use std::io::{Read, Write};
27
28 const MSG: &str = "MessagePack support requires the 'msgpack' feature.\n Install with: cargo install dkit --features msgpack";
29
30 pub struct MsgpackReader;
31 impl MsgpackReader {
32 pub fn read_from_bytes(&self, _bytes: &[u8]) -> anyhow::Result<Value> {
33 anyhow::bail!(MSG)
34 }
35 }
36 impl FormatReader for MsgpackReader {
37 fn read(&self, _: &str) -> anyhow::Result<Value> {
38 anyhow::bail!(MSG)
39 }
40 fn read_from_reader(&self, _: impl Read) -> anyhow::Result<Value> {
41 anyhow::bail!(MSG)
42 }
43 }
44 pub struct MsgpackWriter;
45 impl MsgpackWriter {
46 pub fn write_bytes(&self, _value: &Value) -> anyhow::Result<Vec<u8>> {
47 anyhow::bail!(MSG)
48 }
49 }
50 impl FormatWriter for MsgpackWriter {
51 fn write(&self, _: &Value) -> anyhow::Result<String> {
52 anyhow::bail!(MSG)
53 }
54 fn write_to_writer(&self, _: &Value, _: impl Write) -> anyhow::Result<()> {
55 anyhow::bail!(MSG)
56 }
57 }
58}
59
60#[cfg(feature = "parquet")]
62pub mod parquet;
63#[cfg(not(feature = "parquet"))]
64pub mod parquet {
65 use crate::value::Value;
67
68 const MSG: &str = "Parquet support requires the 'parquet' feature.\n Install with: cargo install dkit --features parquet";
69
70 #[derive(Debug, Clone, Default)]
71 pub struct ParquetOptions {
72 pub row_group: Option<usize>,
73 }
74 pub struct ParquetReader {
75 _options: ParquetOptions,
76 }
77 impl ParquetReader {
78 pub fn new(options: ParquetOptions) -> Self {
79 Self { _options: options }
80 }
81 pub fn read_from_bytes(&self, _bytes: &[u8]) -> anyhow::Result<Value> {
82 anyhow::bail!(MSG)
83 }
84 #[allow(dead_code)]
85 pub fn read_metadata(_bytes: &[u8]) -> anyhow::Result<ParquetMetadata> {
86 anyhow::bail!(MSG)
87 }
88 }
89 #[allow(dead_code)]
90 pub struct ParquetMetadata {
91 pub num_rows: usize,
92 pub num_row_groups: usize,
93 pub columns: Vec<String>,
94 pub column_types: Vec<String>,
95 }
96 #[derive(Debug, Clone, Default)]
97 pub enum ParquetCompression {
98 #[default]
99 None,
100 Snappy,
101 Gzip,
102 Zstd,
103 }
104 impl std::str::FromStr for ParquetCompression {
105 type Err = anyhow::Error;
106 fn from_str(s: &str) -> anyhow::Result<Self> {
107 match s.to_lowercase().as_str() {
108 "none" | "uncompressed" => Ok(Self::None),
109 "snappy" => Ok(Self::Snappy),
110 "gzip" => Ok(Self::Gzip),
111 "zstd" => Ok(Self::Zstd),
112 _ => anyhow::bail!(
113 "Unknown Parquet compression '{}'. Valid options: none, snappy, gzip, zstd",
114 s
115 ),
116 }
117 }
118 }
119 #[derive(Debug, Clone, Default)]
120 pub struct ParquetWriteOptions {
121 pub compression: ParquetCompression,
122 pub row_group_size: Option<usize>,
123 }
124 pub struct ParquetWriter {
125 _options: ParquetWriteOptions,
126 }
127 impl ParquetWriter {
128 pub fn new(options: ParquetWriteOptions) -> Self {
129 Self { _options: options }
130 }
131 pub fn write_to_bytes(&self, _value: &Value) -> anyhow::Result<Vec<u8>> {
132 anyhow::bail!(MSG)
133 }
134 }
135 pub fn arrow_value_to_value(_array: &dyn std::any::Any, _idx: usize) -> Value {
137 Value::Null
138 }
139}
140
141#[cfg(feature = "sqlite")]
143pub mod sqlite;
144#[cfg(not(feature = "sqlite"))]
145pub mod sqlite {
146 use crate::value::Value;
148 use std::path::Path;
149
150 const MSG: &str = "SQLite support requires the 'sqlite' feature.\n Install with: cargo install dkit --features sqlite";
151
152 #[derive(Debug, Clone, Default)]
153 pub struct SqliteOptions {
154 pub table: Option<String>,
155 pub sql: Option<String>,
156 }
157 pub struct SqliteReader {
158 _options: SqliteOptions,
159 }
160 impl SqliteReader {
161 pub fn new(options: SqliteOptions) -> Self {
162 Self { _options: options }
163 }
164 pub fn read_from_path(&self, _path: &Path) -> anyhow::Result<Value> {
165 anyhow::bail!(MSG)
166 }
167 pub fn list_tables(_path: &Path) -> anyhow::Result<Vec<String>> {
168 anyhow::bail!(MSG)
169 }
170 }
171}
172
173#[cfg(feature = "excel")]
175pub mod xlsx;
176#[cfg(not(feature = "excel"))]
177pub mod xlsx {
178 use crate::value::Value;
180
181 const MSG: &str = "Excel support requires the 'excel' feature.\n Install with: cargo install dkit --features excel";
182
183 #[derive(Debug, Clone, Default)]
184 pub struct XlsxOptions {
185 pub sheet: Option<String>,
186 pub header_row: usize,
187 }
188 pub struct XlsxReader {
189 _options: XlsxOptions,
190 }
191 impl XlsxReader {
192 pub fn new(options: XlsxOptions) -> Self {
193 Self { _options: options }
194 }
195 pub fn read_from_bytes(&self, _bytes: &[u8]) -> anyhow::Result<Value> {
196 anyhow::bail!(MSG)
197 }
198 pub fn list_sheets(_bytes: &[u8]) -> anyhow::Result<Vec<String>> {
199 anyhow::bail!(MSG)
200 }
201 }
202}
203
204#[cfg(feature = "xml")]
206pub mod xml;
207#[cfg(not(feature = "xml"))]
208pub mod xml {
209 use super::{FormatReader, FormatWriter};
211 use crate::value::Value;
212 use std::io::{Read, Write};
213
214 const MSG: &str = "XML support requires the 'xml' feature.\n Install with: cargo install dkit --features xml";
215
216 #[derive(Default)]
217 pub struct XmlReader {
218 _private: (),
219 }
220 impl XmlReader {
221 #[allow(dead_code)]
222 pub fn new(_strip_namespaces: bool) -> Self {
223 Self { _private: () }
224 }
225 }
226 impl FormatReader for XmlReader {
227 fn read(&self, _: &str) -> anyhow::Result<Value> {
228 anyhow::bail!(MSG)
229 }
230 fn read_from_reader(&self, _: impl Read) -> anyhow::Result<Value> {
231 anyhow::bail!(MSG)
232 }
233 }
234 pub struct XmlWriter {
235 _private: (),
236 }
237 impl XmlWriter {
238 pub fn new(_pretty: bool, _root_element: Option<String>) -> Self {
239 Self { _private: () }
240 }
241 }
242 impl FormatWriter for XmlWriter {
243 fn write(&self, _: &Value) -> anyhow::Result<String> {
244 anyhow::bail!(MSG)
245 }
246 fn write_to_writer(&self, _: &Value, _: impl Write) -> anyhow::Result<()> {
247 anyhow::bail!(MSG)
248 }
249 }
250}
251
252use std::io::{Read, Write};
253use std::path::Path;
254
255use crate::error::DkitError;
256use crate::value::Value;
257
258#[derive(Debug, Clone, Copy, PartialEq)]
263#[non_exhaustive]
264pub enum Format {
265 Json,
267 Jsonl,
269 Csv,
271 Yaml,
273 Toml,
275 Xml,
277 Msgpack,
279 Xlsx,
281 Sqlite,
283 Parquet,
285 Markdown,
287 Html,
289 Table,
291}
292
293impl Format {
294 #[allow(clippy::should_implement_trait)]
295 pub fn from_str(s: &str) -> Result<Self, DkitError> {
296 match s.to_lowercase().as_str() {
297 "json" => Ok(Format::Json),
298 "jsonl" | "jsonlines" | "ndjson" => Ok(Format::Jsonl),
299 "csv" | "tsv" => Ok(Format::Csv),
300 "yaml" | "yml" => Ok(Format::Yaml),
301 "toml" => Ok(Format::Toml),
302 "xml" => Ok(Format::Xml),
303 "msgpack" | "messagepack" => Ok(Format::Msgpack),
304 "xlsx" | "excel" | "xls" => Ok(Format::Xlsx),
305 "sqlite" | "sqlite3" | "db" => Ok(Format::Sqlite),
306 "parquet" | "pq" => Ok(Format::Parquet),
307 "md" | "markdown" => Ok(Format::Markdown),
308 "html" => Ok(Format::Html),
309 "table" => Ok(Format::Table),
310 _ => Err(DkitError::UnknownFormat(s.to_string())),
311 }
312 }
313
314 pub fn list_output_formats() -> Vec<(&'static str, &'static str)> {
316 let mut formats = vec![
317 ("json", "JSON format"),
318 ("csv", "Comma-separated values"),
319 ("tsv", "Tab-separated values (CSV variant)"),
320 ("yaml", "YAML format"),
321 ("toml", "TOML format"),
322 ("jsonl", "JSON Lines (one JSON object per line)"),
323 ];
324
325 if cfg!(feature = "xml") {
326 formats.push(("xml", "XML format"));
327 } else {
328 formats.push(("xml", "XML format (requires --features xml)"));
329 }
330 if cfg!(feature = "msgpack") {
331 formats.push(("msgpack", "MessagePack binary format"));
332 } else {
333 formats.push((
334 "msgpack",
335 "MessagePack binary format (requires --features msgpack)",
336 ));
337 }
338 if cfg!(feature = "excel") {
339 formats.push(("xlsx", "Excel spreadsheet (input only)"));
340 } else {
341 formats.push(("xlsx", "Excel spreadsheet (requires --features excel)"));
342 }
343 if cfg!(feature = "sqlite") {
344 formats.push(("sqlite", "SQLite database (input only)"));
345 } else {
346 formats.push(("sqlite", "SQLite database (requires --features sqlite)"));
347 }
348 if cfg!(feature = "parquet") {
349 formats.push(("parquet", "Apache Parquet columnar format"));
350 } else {
351 formats.push((
352 "parquet",
353 "Apache Parquet columnar format (requires --features parquet)",
354 ));
355 }
356
357 formats.push(("md", "Markdown table"));
358 formats.push(("html", "HTML table"));
359 formats.push(("table", "Terminal table (default for view)"));
360
361 formats
362 }
363}
364
365impl std::fmt::Display for Format {
366 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
367 match self {
368 Format::Json => write!(f, "JSON"),
369 Format::Jsonl => write!(f, "JSONL"),
370 Format::Csv => write!(f, "CSV"),
371 Format::Yaml => write!(f, "YAML"),
372 Format::Toml => write!(f, "TOML"),
373 Format::Xml => write!(f, "XML"),
374 Format::Msgpack => write!(f, "MessagePack"),
375 Format::Xlsx => write!(f, "Excel"),
376 Format::Sqlite => write!(f, "SQLite"),
377 Format::Parquet => write!(f, "Parquet"),
378 Format::Markdown => write!(f, "Markdown"),
379 Format::Html => write!(f, "HTML"),
380 Format::Table => write!(f, "Table"),
381 }
382 }
383}
384
385pub fn detect_format(path: &Path) -> Result<Format, DkitError> {
387 match path.extension().and_then(|e| e.to_str()) {
388 Some("json") => Ok(Format::Json),
389 Some("jsonl" | "ndjson") => Ok(Format::Jsonl),
390 Some("csv" | "tsv") => Ok(Format::Csv),
391 Some("yaml" | "yml") => Ok(Format::Yaml),
392 Some("toml") => Ok(Format::Toml),
393 Some("xml") => Ok(Format::Xml),
394 Some("msgpack") => Ok(Format::Msgpack),
395 Some("xlsx" | "xls" | "xlsm" | "xlsb" | "ods") => Ok(Format::Xlsx),
396 Some("db" | "sqlite" | "sqlite3") => Ok(Format::Sqlite),
397 Some("parquet" | "pq") => Ok(Format::Parquet),
398 Some("md") => Ok(Format::Markdown),
399 Some("html") => Ok(Format::Html),
400 Some(ext) => Err(DkitError::UnknownFormat(ext.to_string())),
401 None => Err(DkitError::UnknownFormat("(no extension)".to_string())),
402 }
403}
404
405pub fn detect_format_from_content(content: &str) -> Result<(Format, Option<char>), DkitError> {
415 let trimmed = content.trim_start();
416
417 if trimmed.is_empty() {
418 return Err(DkitError::FormatDetectionFailed(
419 "input is empty".to_string(),
420 ));
421 }
422
423 if trimmed.starts_with("<?xml") || trimmed.starts_with("<!DOCTYPE") {
425 return Ok((Format::Xml, None));
426 }
427
428 let mut lines = trimmed.lines().filter(|l| !l.trim().is_empty());
430 if let Some(first_line) = lines.next() {
431 if let Some(second_line) = lines.next() {
432 let first_trimmed = first_line.trim();
433 let second_trimmed = second_line.trim();
434 if first_trimmed.starts_with('{')
435 && first_trimmed.ends_with('}')
436 && second_trimmed.starts_with('{')
437 && second_trimmed.ends_with('}')
438 {
439 return Ok((Format::Jsonl, None));
440 }
441 }
442 }
443
444 if trimmed.starts_with('{') {
446 return Ok((Format::Json, None));
447 }
448
449 if trimmed.starts_with('[') {
453 let first_line = trimmed.lines().next().unwrap_or("").trim();
454 let is_toml_section = first_line.starts_with("[[")
456 || (first_line.starts_with('[')
457 && first_line.ends_with(']')
458 && !first_line.contains(',')
459 && first_line[1..first_line.len() - 1].chars().all(|c| {
460 c.is_alphanumeric() || c == '_' || c == '-' || c == '.' || c == ' ' || c == '"'
461 }));
462 if is_toml_section {
463 return Ok((Format::Toml, None));
464 }
465 return Ok((Format::Json, None));
466 }
467
468 if trimmed.starts_with('<') {
470 return Ok((Format::Xml, None));
471 }
472
473 if let Some(first_line) = trimmed.lines().next() {
475 if first_line.contains('\t') {
476 return Ok((Format::Csv, Some('\t')));
477 }
478 }
479
480 let first_line = trimmed.lines().next().unwrap_or("");
482 let ft = first_line.trim();
483 if ft.contains(" = ") {
484 return Ok((Format::Toml, None));
485 }
486
487 if ft.starts_with("---") || ft.contains(": ") || ft.ends_with(':') {
489 return Ok((Format::Yaml, None));
490 }
491
492 if ft.contains(',') {
494 return Ok((Format::Csv, None));
495 }
496
497 Err(DkitError::FormatDetectionFailed(
498 "could not determine format from content".to_string(),
499 ))
500}
501
502pub fn default_delimiter(path: &Path) -> Option<char> {
505 match path.extension().and_then(|e| e.to_str()) {
506 Some("tsv") => Some('\t'),
507 _ => None,
508 }
509}
510
511pub fn default_delimiter_for_format(format_str: &str) -> Option<char> {
513 match format_str.to_lowercase().as_str() {
514 "tsv" => Some('\t'),
515 _ => None,
516 }
517}
518
519#[derive(Debug, Clone)]
523pub struct FormatOptions {
524 pub delimiter: Option<char>,
526 pub no_header: bool,
528 pub pretty: bool,
530 pub compact: bool,
532 pub flow_style: bool,
534 pub root_element: Option<String>,
536 pub styled: bool,
538 pub full_html: bool,
540}
541
542impl Default for FormatOptions {
543 fn default() -> Self {
544 Self {
545 delimiter: None,
546 no_header: false,
547 pretty: true,
548 compact: false,
549 flow_style: false,
550 root_element: None,
551 styled: false,
552 full_html: false,
553 }
554 }
555}
556
557#[allow(dead_code)]
561pub trait FormatReader {
562 fn read(&self, input: &str) -> anyhow::Result<Value>;
564
565 fn read_from_reader(&self, reader: impl Read) -> anyhow::Result<Value>;
567}
568
569#[allow(dead_code)]
573pub trait FormatWriter {
574 fn write(&self, value: &Value) -> anyhow::Result<String>;
576
577 fn write_to_writer(&self, value: &Value, writer: impl Write) -> anyhow::Result<()>;
579}
580
581#[cfg(test)]
582mod tests {
583 use super::*;
584 use std::path::PathBuf;
585
586 #[test]
589 fn test_format_from_str() {
590 assert_eq!(Format::from_str("json").unwrap(), Format::Json);
591 assert_eq!(Format::from_str("JSON").unwrap(), Format::Json);
592 assert_eq!(Format::from_str("csv").unwrap(), Format::Csv);
593 assert_eq!(Format::from_str("tsv").unwrap(), Format::Csv);
594 assert_eq!(Format::from_str("TSV").unwrap(), Format::Csv);
595 assert_eq!(Format::from_str("yaml").unwrap(), Format::Yaml);
596 assert_eq!(Format::from_str("yml").unwrap(), Format::Yaml);
597 assert_eq!(Format::from_str("toml").unwrap(), Format::Toml);
598 }
599
600 #[test]
601 fn test_format_from_str_jsonl() {
602 assert_eq!(Format::from_str("jsonl").unwrap(), Format::Jsonl);
603 assert_eq!(Format::from_str("jsonlines").unwrap(), Format::Jsonl);
604 assert_eq!(Format::from_str("ndjson").unwrap(), Format::Jsonl);
605 assert_eq!(Format::from_str("JSONL").unwrap(), Format::Jsonl);
606 }
607
608 #[test]
609 fn test_format_from_str_xml() {
610 assert_eq!(Format::from_str("xml").unwrap(), Format::Xml);
611 }
612
613 #[test]
614 fn test_format_from_str_msgpack() {
615 assert_eq!(Format::from_str("msgpack").unwrap(), Format::Msgpack);
616 assert_eq!(Format::from_str("messagepack").unwrap(), Format::Msgpack);
617 }
618
619 #[test]
620 fn test_format_from_str_markdown() {
621 assert_eq!(Format::from_str("md").unwrap(), Format::Markdown);
622 assert_eq!(Format::from_str("markdown").unwrap(), Format::Markdown);
623 assert_eq!(Format::from_str("MD").unwrap(), Format::Markdown);
624 }
625
626 #[test]
627 fn test_format_from_str_unknown() {
628 let err = Format::from_str("bin").unwrap_err();
629 assert!(matches!(err, DkitError::UnknownFormat(s) if s == "bin"));
630 }
631
632 #[test]
635 fn test_format_display() {
636 assert_eq!(Format::Json.to_string(), "JSON");
637 assert_eq!(Format::Csv.to_string(), "CSV");
638 assert_eq!(Format::Yaml.to_string(), "YAML");
639 assert_eq!(Format::Toml.to_string(), "TOML");
640 assert_eq!(Format::Jsonl.to_string(), "JSONL");
641 assert_eq!(Format::Xml.to_string(), "XML");
642 assert_eq!(Format::Msgpack.to_string(), "MessagePack");
643 assert_eq!(Format::Markdown.to_string(), "Markdown");
644 assert_eq!(Format::Table.to_string(), "Table");
645 }
646
647 #[test]
648 fn test_format_from_str_table() {
649 assert_eq!(Format::from_str("table").unwrap(), Format::Table);
650 assert_eq!(Format::from_str("TABLE").unwrap(), Format::Table);
651 }
652
653 #[test]
654 fn test_list_output_formats() {
655 let formats = Format::list_output_formats();
656 assert!(formats.len() >= 10);
657 assert!(formats.iter().any(|(name, _)| *name == "table"));
658 assert!(formats.iter().any(|(name, _)| *name == "json"));
659 }
660
661 #[test]
664 fn test_detect_format_json() {
665 assert_eq!(
666 detect_format(&PathBuf::from("data.json")).unwrap(),
667 Format::Json
668 );
669 }
670
671 #[test]
672 fn test_detect_format_csv_tsv() {
673 assert_eq!(
674 detect_format(&PathBuf::from("data.csv")).unwrap(),
675 Format::Csv
676 );
677 assert_eq!(
678 detect_format(&PathBuf::from("data.tsv")).unwrap(),
679 Format::Csv
680 );
681 }
682
683 #[test]
684 fn test_detect_format_yaml() {
685 assert_eq!(
686 detect_format(&PathBuf::from("data.yaml")).unwrap(),
687 Format::Yaml
688 );
689 assert_eq!(
690 detect_format(&PathBuf::from("data.yml")).unwrap(),
691 Format::Yaml
692 );
693 }
694
695 #[test]
696 fn test_detect_format_toml() {
697 assert_eq!(
698 detect_format(&PathBuf::from("config.toml")).unwrap(),
699 Format::Toml
700 );
701 }
702
703 #[test]
704 fn test_detect_format_jsonl() {
705 assert_eq!(
706 detect_format(&PathBuf::from("data.jsonl")).unwrap(),
707 Format::Jsonl
708 );
709 assert_eq!(
710 detect_format(&PathBuf::from("data.ndjson")).unwrap(),
711 Format::Jsonl
712 );
713 }
714
715 #[test]
716 fn test_detect_format_xml() {
717 assert_eq!(
718 detect_format(&PathBuf::from("data.xml")).unwrap(),
719 Format::Xml
720 );
721 }
722
723 #[test]
724 fn test_detect_format_msgpack() {
725 assert_eq!(
726 detect_format(&PathBuf::from("data.msgpack")).unwrap(),
727 Format::Msgpack
728 );
729 }
730
731 #[test]
732 fn test_detect_format_markdown() {
733 assert_eq!(
734 detect_format(&PathBuf::from("output.md")).unwrap(),
735 Format::Markdown
736 );
737 }
738
739 #[test]
740 fn test_detect_format_unknown_ext() {
741 let err = detect_format(&PathBuf::from("data.bin")).unwrap_err();
742 assert!(matches!(err, DkitError::UnknownFormat(s) if s == "bin"));
743 }
744
745 #[test]
746 fn test_detect_format_no_extension() {
747 let err = detect_format(&PathBuf::from("Makefile")).unwrap_err();
748 assert!(matches!(err, DkitError::UnknownFormat(s) if s == "(no extension)"));
749 }
750
751 #[test]
756 fn test_default_delimiter_tsv() {
757 assert_eq!(default_delimiter(&PathBuf::from("data.tsv")), Some('\t'));
758 }
759
760 #[test]
761 fn test_default_delimiter_csv() {
762 assert_eq!(default_delimiter(&PathBuf::from("data.csv")), None);
763 }
764
765 #[test]
766 fn test_default_delimiter_json() {
767 assert_eq!(default_delimiter(&PathBuf::from("data.json")), None);
768 }
769
770 #[test]
771 fn test_default_delimiter_for_format_tsv() {
772 assert_eq!(default_delimiter_for_format("tsv"), Some('\t'));
773 assert_eq!(default_delimiter_for_format("TSV"), Some('\t'));
774 }
775
776 #[test]
777 fn test_default_delimiter_for_format_csv() {
778 assert_eq!(default_delimiter_for_format("csv"), None);
779 }
780
781 #[test]
784 fn test_format_options_default() {
785 let opts = FormatOptions::default();
786 assert_eq!(opts.delimiter, None);
787 assert!(!opts.no_header);
788 assert!(opts.pretty);
789 assert!(!opts.compact);
790 assert!(!opts.flow_style);
791 assert_eq!(opts.root_element, None);
792 }
793
794 #[test]
797 fn test_sniff_xml_declaration() {
798 let (fmt, delim) = detect_format_from_content("<?xml version=\"1.0\"?>\n<root/>").unwrap();
799 assert_eq!(fmt, Format::Xml);
800 assert_eq!(delim, None);
801 }
802
803 #[test]
804 fn test_sniff_xml_tag() {
805 let (fmt, _) = detect_format_from_content("<root><item>hello</item></root>").unwrap();
806 assert_eq!(fmt, Format::Xml);
807 }
808
809 #[test]
810 fn test_sniff_json_object() {
811 let (fmt, _) = detect_format_from_content("{\"name\": \"Alice\"}").unwrap();
812 assert_eq!(fmt, Format::Json);
813 }
814
815 #[test]
816 fn test_sniff_json_array() {
817 let (fmt, _) = detect_format_from_content("[1, 2, 3]").unwrap();
818 assert_eq!(fmt, Format::Json);
819 }
820
821 #[test]
822 fn test_sniff_jsonl() {
823 let content = "{\"name\": \"Alice\"}\n{\"name\": \"Bob\"}\n";
824 let (fmt, _) = detect_format_from_content(content).unwrap();
825 assert_eq!(fmt, Format::Jsonl);
826 }
827
828 #[test]
829 fn test_sniff_tsv() {
830 let content = "name\tage\tcity\nAlice\t30\tSeoul\n";
831 let (fmt, delim) = detect_format_from_content(content).unwrap();
832 assert_eq!(fmt, Format::Csv);
833 assert_eq!(delim, Some('\t'));
834 }
835
836 #[test]
837 fn test_sniff_toml_section() {
838 let content = "[database]\nhost = \"localhost\"\nport = 5432\n";
839 let (fmt, _) = detect_format_from_content(content).unwrap();
840 assert_eq!(fmt, Format::Toml);
841 }
842
843 #[test]
844 fn test_sniff_toml_key_value() {
845 let content = "title = \"My App\"\nversion = \"1.0\"\n";
846 let (fmt, _) = detect_format_from_content(content).unwrap();
847 assert_eq!(fmt, Format::Toml);
848 }
849
850 #[test]
851 fn test_sniff_yaml_document() {
852 let content = "---\nname: Alice\nage: 30\n";
853 let (fmt, _) = detect_format_from_content(content).unwrap();
854 assert_eq!(fmt, Format::Yaml);
855 }
856
857 #[test]
858 fn test_sniff_yaml_key_value() {
859 let content = "name: Alice\nage: 30\n";
860 let (fmt, _) = detect_format_from_content(content).unwrap();
861 assert_eq!(fmt, Format::Yaml);
862 }
863
864 #[test]
865 fn test_sniff_csv() {
866 let content = "name,age,city\nAlice,30,Seoul\n";
867 let (fmt, delim) = detect_format_from_content(content).unwrap();
868 assert_eq!(fmt, Format::Csv);
869 assert_eq!(delim, None);
870 }
871
872 #[test]
873 fn test_sniff_empty_content() {
874 let err = detect_format_from_content("").unwrap_err();
875 assert!(matches!(err, DkitError::FormatDetectionFailed(_)));
876 }
877
878 #[test]
879 fn test_sniff_whitespace_only() {
880 let err = detect_format_from_content(" \n \n").unwrap_err();
881 assert!(matches!(err, DkitError::FormatDetectionFailed(_)));
882 }
883}