Skip to main content

dkit_core/format/
mod.rs

1/// CSV/TSV reader and writer.
2pub mod csv;
3/// HTML table writer.
4pub mod html;
5/// JSON reader, writer, and value conversion utilities.
6pub mod json;
7/// JSON Lines (NDJSON) reader and writer.
8pub mod jsonl;
9/// Markdown table writer.
10pub mod markdown;
11/// TOML reader and writer.
12pub mod toml;
13/// YAML reader and writer.
14pub mod yaml;
15
16// --- Feature-gated format modules ---
17
18/// MessagePack binary reader and writer.
19#[cfg(feature = "msgpack")]
20pub mod msgpack;
21#[cfg(not(feature = "msgpack"))]
22pub mod msgpack {
23    //! Stub module — MessagePack feature not enabled.
24    use super::{FormatReader, FormatWriter};
25    use crate::value::Value;
26    use std::io::{Read, Write};
27
28    const MSG: &str = "MessagePack support requires the 'msgpack' feature.\n  Install with: cargo install dkit --features msgpack";
29
30    pub struct MsgpackReader;
31    impl MsgpackReader {
32        pub fn read_from_bytes(&self, _bytes: &[u8]) -> anyhow::Result<Value> {
33            anyhow::bail!(MSG)
34        }
35    }
36    impl FormatReader for MsgpackReader {
37        fn read(&self, _: &str) -> anyhow::Result<Value> {
38            anyhow::bail!(MSG)
39        }
40        fn read_from_reader(&self, _: impl Read) -> anyhow::Result<Value> {
41            anyhow::bail!(MSG)
42        }
43    }
44    pub struct MsgpackWriter;
45    impl MsgpackWriter {
46        pub fn write_bytes(&self, _value: &Value) -> anyhow::Result<Vec<u8>> {
47            anyhow::bail!(MSG)
48        }
49    }
50    impl FormatWriter for MsgpackWriter {
51        fn write(&self, _: &Value) -> anyhow::Result<String> {
52            anyhow::bail!(MSG)
53        }
54        fn write_to_writer(&self, _: &Value, _: impl Write) -> anyhow::Result<()> {
55            anyhow::bail!(MSG)
56        }
57    }
58}
59
60/// Apache Parquet columnar format reader and writer.
61#[cfg(feature = "parquet")]
62pub mod parquet;
63#[cfg(not(feature = "parquet"))]
64pub mod parquet {
65    //! Stub module — Parquet feature not enabled.
66    use crate::value::Value;
67
68    const MSG: &str = "Parquet support requires the 'parquet' feature.\n  Install with: cargo install dkit --features parquet";
69
70    #[derive(Debug, Clone, Default)]
71    pub struct ParquetOptions {
72        pub row_group: Option<usize>,
73    }
74    pub struct ParquetReader {
75        _options: ParquetOptions,
76    }
77    impl ParquetReader {
78        pub fn new(options: ParquetOptions) -> Self {
79            Self { _options: options }
80        }
81        pub fn read_from_bytes(&self, _bytes: &[u8]) -> anyhow::Result<Value> {
82            anyhow::bail!(MSG)
83        }
84        #[allow(dead_code)]
85        pub fn read_metadata(_bytes: &[u8]) -> anyhow::Result<ParquetMetadata> {
86            anyhow::bail!(MSG)
87        }
88    }
89    #[allow(dead_code)]
90    pub struct ParquetMetadata {
91        pub num_rows: usize,
92        pub num_row_groups: usize,
93        pub columns: Vec<String>,
94        pub column_types: Vec<String>,
95    }
96    #[derive(Debug, Clone, Default)]
97    pub enum ParquetCompression {
98        #[default]
99        None,
100        Snappy,
101        Gzip,
102        Zstd,
103    }
104    impl std::str::FromStr for ParquetCompression {
105        type Err = anyhow::Error;
106        fn from_str(s: &str) -> anyhow::Result<Self> {
107            match s.to_lowercase().as_str() {
108                "none" | "uncompressed" => Ok(Self::None),
109                "snappy" => Ok(Self::Snappy),
110                "gzip" => Ok(Self::Gzip),
111                "zstd" => Ok(Self::Zstd),
112                _ => anyhow::bail!(
113                    "Unknown Parquet compression '{}'. Valid options: none, snappy, gzip, zstd",
114                    s
115                ),
116            }
117        }
118    }
119    #[derive(Debug, Clone, Default)]
120    pub struct ParquetWriteOptions {
121        pub compression: ParquetCompression,
122        pub row_group_size: Option<usize>,
123    }
124    pub struct ParquetWriter {
125        _options: ParquetWriteOptions,
126    }
127    impl ParquetWriter {
128        pub fn new(options: ParquetWriteOptions) -> Self {
129            Self { _options: options }
130        }
131        pub fn write_to_bytes(&self, _value: &Value) -> anyhow::Result<Vec<u8>> {
132            anyhow::bail!(MSG)
133        }
134    }
135    /// Stub for arrow_value_to_value when parquet feature is disabled.
136    pub fn arrow_value_to_value(_array: &dyn std::any::Any, _idx: usize) -> Value {
137        Value::Null
138    }
139}
140
141/// SQLite database reader.
142#[cfg(feature = "sqlite")]
143pub mod sqlite;
144#[cfg(not(feature = "sqlite"))]
145pub mod sqlite {
146    //! Stub module — SQLite feature not enabled.
147    use crate::value::Value;
148    use std::path::Path;
149
150    const MSG: &str = "SQLite support requires the 'sqlite' feature.\n  Install with: cargo install dkit --features sqlite";
151
152    #[derive(Debug, Clone, Default)]
153    pub struct SqliteOptions {
154        pub table: Option<String>,
155        pub sql: Option<String>,
156    }
157    pub struct SqliteReader {
158        _options: SqliteOptions,
159    }
160    impl SqliteReader {
161        pub fn new(options: SqliteOptions) -> Self {
162            Self { _options: options }
163        }
164        pub fn read_from_path(&self, _path: &Path) -> anyhow::Result<Value> {
165            anyhow::bail!(MSG)
166        }
167        pub fn list_tables(_path: &Path) -> anyhow::Result<Vec<String>> {
168            anyhow::bail!(MSG)
169        }
170    }
171}
172
173/// Excel (XLSX) reader.
174#[cfg(feature = "excel")]
175pub mod xlsx;
176#[cfg(not(feature = "excel"))]
177pub mod xlsx {
178    //! Stub module — Excel feature not enabled.
179    use crate::value::Value;
180
181    const MSG: &str = "Excel support requires the 'excel' feature.\n  Install with: cargo install dkit --features excel";
182
183    #[derive(Debug, Clone, Default)]
184    pub struct XlsxOptions {
185        pub sheet: Option<String>,
186        pub header_row: usize,
187    }
188    pub struct XlsxReader {
189        _options: XlsxOptions,
190    }
191    impl XlsxReader {
192        pub fn new(options: XlsxOptions) -> Self {
193            Self { _options: options }
194        }
195        pub fn read_from_bytes(&self, _bytes: &[u8]) -> anyhow::Result<Value> {
196            anyhow::bail!(MSG)
197        }
198        pub fn list_sheets(_bytes: &[u8]) -> anyhow::Result<Vec<String>> {
199            anyhow::bail!(MSG)
200        }
201    }
202}
203
204/// XML reader and writer.
205#[cfg(feature = "xml")]
206pub mod xml;
207#[cfg(not(feature = "xml"))]
208pub mod xml {
209    //! Stub module — XML feature not enabled.
210    use super::{FormatReader, FormatWriter};
211    use crate::value::Value;
212    use std::io::{Read, Write};
213
214    const MSG: &str = "XML support requires the 'xml' feature.\n  Install with: cargo install dkit --features xml";
215
216    #[derive(Default)]
217    pub struct XmlReader {
218        _private: (),
219    }
220    impl XmlReader {
221        #[allow(dead_code)]
222        pub fn new(_strip_namespaces: bool) -> Self {
223            Self { _private: () }
224        }
225    }
226    impl FormatReader for XmlReader {
227        fn read(&self, _: &str) -> anyhow::Result<Value> {
228            anyhow::bail!(MSG)
229        }
230        fn read_from_reader(&self, _: impl Read) -> anyhow::Result<Value> {
231            anyhow::bail!(MSG)
232        }
233    }
234    pub struct XmlWriter {
235        _private: (),
236    }
237    impl XmlWriter {
238        pub fn new(_pretty: bool, _root_element: Option<String>) -> Self {
239            Self { _private: () }
240        }
241    }
242    impl FormatWriter for XmlWriter {
243        fn write(&self, _: &Value) -> anyhow::Result<String> {
244            anyhow::bail!(MSG)
245        }
246        fn write_to_writer(&self, _: &Value, _: impl Write) -> anyhow::Result<()> {
247            anyhow::bail!(MSG)
248        }
249    }
250}
251
252use std::io::{Read, Write};
253use std::path::Path;
254
255use crate::error::DkitError;
256use crate::value::Value;
257
258/// Supported data formats for reading and writing.
259///
260/// Each variant represents a data serialization format that dkit can
261/// convert to or from the unified [`Value`] model.
262#[derive(Debug, Clone, Copy, PartialEq)]
263#[non_exhaustive]
264pub enum Format {
265    /// JSON (`*.json`)
266    Json,
267    /// JSON Lines / NDJSON (`*.jsonl`, `*.ndjson`)
268    Jsonl,
269    /// Comma/Tab-separated values (`*.csv`, `*.tsv`)
270    Csv,
271    /// YAML (`*.yaml`, `*.yml`)
272    Yaml,
273    /// TOML (`*.toml`)
274    Toml,
275    /// XML (`*.xml`)
276    Xml,
277    /// MessagePack binary format (`*.msgpack`)
278    Msgpack,
279    /// Excel spreadsheet (`*.xlsx`, read-only)
280    Xlsx,
281    /// SQLite database (`*.sqlite`, read-only)
282    Sqlite,
283    /// Apache Parquet columnar format (`*.parquet`)
284    Parquet,
285    /// Markdown table (write-only)
286    Markdown,
287    /// HTML table (write-only)
288    Html,
289    /// Terminal table (write-only, used by `dkit view`)
290    Table,
291}
292
293impl Format {
294    #[allow(clippy::should_implement_trait)]
295    pub fn from_str(s: &str) -> Result<Self, DkitError> {
296        match s.to_lowercase().as_str() {
297            "json" => Ok(Format::Json),
298            "jsonl" | "jsonlines" | "ndjson" => Ok(Format::Jsonl),
299            "csv" | "tsv" => Ok(Format::Csv),
300            "yaml" | "yml" => Ok(Format::Yaml),
301            "toml" => Ok(Format::Toml),
302            "xml" => Ok(Format::Xml),
303            "msgpack" | "messagepack" => Ok(Format::Msgpack),
304            "xlsx" | "excel" | "xls" => Ok(Format::Xlsx),
305            "sqlite" | "sqlite3" | "db" => Ok(Format::Sqlite),
306            "parquet" | "pq" => Ok(Format::Parquet),
307            "md" | "markdown" => Ok(Format::Markdown),
308            "html" => Ok(Format::Html),
309            "table" => Ok(Format::Table),
310            _ => Err(DkitError::UnknownFormat(s.to_string())),
311        }
312    }
313
314    /// 사용 가능한 출력 포맷 목록을 반환한다
315    pub fn list_output_formats() -> Vec<(&'static str, &'static str)> {
316        let mut formats = vec![
317            ("json", "JSON format"),
318            ("csv", "Comma-separated values"),
319            ("tsv", "Tab-separated values (CSV variant)"),
320            ("yaml", "YAML format"),
321            ("toml", "TOML format"),
322            ("jsonl", "JSON Lines (one JSON object per line)"),
323        ];
324
325        if cfg!(feature = "xml") {
326            formats.push(("xml", "XML format"));
327        } else {
328            formats.push(("xml", "XML format (requires --features xml)"));
329        }
330        if cfg!(feature = "msgpack") {
331            formats.push(("msgpack", "MessagePack binary format"));
332        } else {
333            formats.push((
334                "msgpack",
335                "MessagePack binary format (requires --features msgpack)",
336            ));
337        }
338        if cfg!(feature = "excel") {
339            formats.push(("xlsx", "Excel spreadsheet (input only)"));
340        } else {
341            formats.push(("xlsx", "Excel spreadsheet (requires --features excel)"));
342        }
343        if cfg!(feature = "sqlite") {
344            formats.push(("sqlite", "SQLite database (input only)"));
345        } else {
346            formats.push(("sqlite", "SQLite database (requires --features sqlite)"));
347        }
348        if cfg!(feature = "parquet") {
349            formats.push(("parquet", "Apache Parquet columnar format"));
350        } else {
351            formats.push((
352                "parquet",
353                "Apache Parquet columnar format (requires --features parquet)",
354            ));
355        }
356
357        formats.push(("md", "Markdown table"));
358        formats.push(("html", "HTML table"));
359        formats.push(("table", "Terminal table (default for view)"));
360
361        formats
362    }
363}
364
365impl std::fmt::Display for Format {
366    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
367        match self {
368            Format::Json => write!(f, "JSON"),
369            Format::Jsonl => write!(f, "JSONL"),
370            Format::Csv => write!(f, "CSV"),
371            Format::Yaml => write!(f, "YAML"),
372            Format::Toml => write!(f, "TOML"),
373            Format::Xml => write!(f, "XML"),
374            Format::Msgpack => write!(f, "MessagePack"),
375            Format::Xlsx => write!(f, "Excel"),
376            Format::Sqlite => write!(f, "SQLite"),
377            Format::Parquet => write!(f, "Parquet"),
378            Format::Markdown => write!(f, "Markdown"),
379            Format::Html => write!(f, "HTML"),
380            Format::Table => write!(f, "Table"),
381        }
382    }
383}
384
385/// 파일 확장자로 포맷을 자동 감지
386pub fn detect_format(path: &Path) -> Result<Format, DkitError> {
387    match path.extension().and_then(|e| e.to_str()) {
388        Some("json") => Ok(Format::Json),
389        Some("jsonl" | "ndjson") => Ok(Format::Jsonl),
390        Some("csv" | "tsv") => Ok(Format::Csv),
391        Some("yaml" | "yml") => Ok(Format::Yaml),
392        Some("toml") => Ok(Format::Toml),
393        Some("xml") => Ok(Format::Xml),
394        Some("msgpack") => Ok(Format::Msgpack),
395        Some("xlsx" | "xls" | "xlsm" | "xlsb" | "ods") => Ok(Format::Xlsx),
396        Some("db" | "sqlite" | "sqlite3") => Ok(Format::Sqlite),
397        Some("parquet" | "pq") => Ok(Format::Parquet),
398        Some("md") => Ok(Format::Markdown),
399        Some("html") => Ok(Format::Html),
400        Some(ext) => Err(DkitError::UnknownFormat(ext.to_string())),
401        None => Err(DkitError::UnknownFormat("(no extension)".to_string())),
402    }
403}
404
405/// 콘텐츠 스니핑으로 포맷을 자동 감지
406///
407/// 감지 우선순위:
408/// 1. `<?xml` → XML
409/// 2. 첫 줄이 JSON 객체 + 둘째 줄도 JSON 객체 → JSONL
410/// 3. `{` 또는 `[` 시작 → JSON
411/// 4. 탭 구분자가 포함된 구조적 데이터 → CSV (TSV)
412/// 5. TOML 패턴 (키 = 값, [섹션])
413/// 6. YAML 패턴 (키: 값, ---)
414pub fn detect_format_from_content(content: &str) -> Result<(Format, Option<char>), DkitError> {
415    let trimmed = content.trim_start();
416
417    if trimmed.is_empty() {
418        return Err(DkitError::FormatDetectionFailed(
419            "input is empty".to_string(),
420        ));
421    }
422
423    // XML: <?xml 또는 루트 태그로 시작
424    if trimmed.starts_with("<?xml") || trimmed.starts_with("<!DOCTYPE") {
425        return Ok((Format::Xml, None));
426    }
427
428    // JSONL: 첫째 줄과 둘째 줄 모두 JSON 객체
429    let mut lines = trimmed.lines().filter(|l| !l.trim().is_empty());
430    if let Some(first_line) = lines.next() {
431        if let Some(second_line) = lines.next() {
432            let first_trimmed = first_line.trim();
433            let second_trimmed = second_line.trim();
434            if first_trimmed.starts_with('{')
435                && first_trimmed.ends_with('}')
436                && second_trimmed.starts_with('{')
437                && second_trimmed.ends_with('}')
438            {
439                return Ok((Format::Jsonl, None));
440            }
441        }
442    }
443
444    // JSON: { 로 시작 (단일 객체)
445    if trimmed.starts_with('{') {
446        return Ok((Format::Json, None));
447    }
448
449    // [ 로 시작: JSON 배열 vs TOML 섹션 헤더 구분
450    // TOML 섹션: [word] 형태 (내부가 알파벳/밑줄/점/하이픈)
451    // JSON 배열: [값, ...] 또는 여러 줄에 걸친 배열
452    if trimmed.starts_with('[') {
453        let first_line = trimmed.lines().next().unwrap_or("").trim();
454        // TOML 섹션 헤더: [section] 또는 [[array]]
455        let is_toml_section = first_line.starts_with("[[")
456            || (first_line.starts_with('[')
457                && first_line.ends_with(']')
458                && !first_line.contains(',')
459                && first_line[1..first_line.len() - 1].chars().all(|c| {
460                    c.is_alphanumeric() || c == '_' || c == '-' || c == '.' || c == ' ' || c == '"'
461                }));
462        if is_toml_section {
463            return Ok((Format::Toml, None));
464        }
465        return Ok((Format::Json, None));
466    }
467
468    // XML: < 로 시작하는 태그 (<?xml 없이 바로 태그로 시작하는 경우)
469    if trimmed.starts_with('<') {
470        return Ok((Format::Xml, None));
471    }
472
473    // TSV: 첫째 줄에 탭이 포함되어 있으면 TSV로 간주
474    if let Some(first_line) = trimmed.lines().next() {
475        if first_line.contains('\t') {
476            return Ok((Format::Csv, Some('\t')));
477        }
478    }
479
480    // TOML: key = value 패턴 (섹션 헤더는 위에서 처리됨)
481    let first_line = trimmed.lines().next().unwrap_or("");
482    let ft = first_line.trim();
483    if ft.contains(" = ") {
484        return Ok((Format::Toml, None));
485    }
486
487    // YAML: --- 또는 key: value 패턴
488    if ft.starts_with("---") || ft.contains(": ") || ft.ends_with(':') {
489        return Ok((Format::Yaml, None));
490    }
491
492    // CSV: 콤마가 포함된 구조적 데이터
493    if ft.contains(',') {
494        return Ok((Format::Csv, None));
495    }
496
497    Err(DkitError::FormatDetectionFailed(
498        "could not determine format from content".to_string(),
499    ))
500}
501
502/// 파일 확장자에 따른 기본 delimiter 반환
503/// `.tsv` 파일은 탭 구분자를 사용한다.
504pub fn default_delimiter(path: &Path) -> Option<char> {
505    match path.extension().and_then(|e| e.to_str()) {
506        Some("tsv") => Some('\t'),
507        _ => None,
508    }
509}
510
511/// `--to` 포맷 문자열에 따른 기본 delimiter 반환
512pub fn default_delimiter_for_format(format_str: &str) -> Option<char> {
513    match format_str.to_lowercase().as_str() {
514        "tsv" => Some('\t'),
515        _ => None,
516    }
517}
518
519/// Format-specific options controlling how data is read or written.
520///
521/// Use [`Default::default()`] to get sensible defaults.
522#[derive(Debug, Clone)]
523pub struct FormatOptions {
524    /// CSV delimiter (기본: ',')
525    pub delimiter: Option<char>,
526    /// CSV 헤더 없음 모드
527    pub no_header: bool,
528    /// Pretty-print 출력
529    pub pretty: bool,
530    /// Compact 출력 (JSON)
531    pub compact: bool,
532    /// YAML inline/flow 스타일
533    pub flow_style: bool,
534    /// XML 루트 엘리먼트 이름 (기본: "root")
535    pub root_element: Option<String>,
536    /// HTML 인라인 CSS 스타일 포함
537    pub styled: bool,
538    /// HTML 완전한 문서 출력
539    pub full_html: bool,
540}
541
542impl Default for FormatOptions {
543    fn default() -> Self {
544        Self {
545            delimiter: None,
546            no_header: false,
547            pretty: true,
548            compact: false,
549            flow_style: false,
550            root_element: None,
551            styled: false,
552            full_html: false,
553        }
554    }
555}
556
557/// Trait for reading a data format into a [`Value`].
558///
559/// Implement this trait to add support for reading a new data format.
560#[allow(dead_code)]
561pub trait FormatReader {
562    /// Parse the given string content and return a [`Value`].
563    fn read(&self, input: &str) -> anyhow::Result<Value>;
564
565    /// Parse data from an [`io::Read`](std::io::Read) source and return a [`Value`].
566    fn read_from_reader(&self, reader: impl Read) -> anyhow::Result<Value>;
567}
568
569/// Trait for writing a [`Value`] to a data format.
570///
571/// Implement this trait to add support for writing a new data format.
572#[allow(dead_code)]
573pub trait FormatWriter {
574    /// Serialize the given [`Value`] and return the formatted string.
575    fn write(&self, value: &Value) -> anyhow::Result<String>;
576
577    /// Serialize the given [`Value`] and write to an [`io::Write`](std::io::Write) destination.
578    fn write_to_writer(&self, value: &Value, writer: impl Write) -> anyhow::Result<()>;
579}
580
581#[cfg(test)]
582mod tests {
583    use super::*;
584    use std::path::PathBuf;
585
586    // --- Format::from_str ---
587
588    #[test]
589    fn test_format_from_str() {
590        assert_eq!(Format::from_str("json").unwrap(), Format::Json);
591        assert_eq!(Format::from_str("JSON").unwrap(), Format::Json);
592        assert_eq!(Format::from_str("csv").unwrap(), Format::Csv);
593        assert_eq!(Format::from_str("tsv").unwrap(), Format::Csv);
594        assert_eq!(Format::from_str("TSV").unwrap(), Format::Csv);
595        assert_eq!(Format::from_str("yaml").unwrap(), Format::Yaml);
596        assert_eq!(Format::from_str("yml").unwrap(), Format::Yaml);
597        assert_eq!(Format::from_str("toml").unwrap(), Format::Toml);
598    }
599
600    #[test]
601    fn test_format_from_str_jsonl() {
602        assert_eq!(Format::from_str("jsonl").unwrap(), Format::Jsonl);
603        assert_eq!(Format::from_str("jsonlines").unwrap(), Format::Jsonl);
604        assert_eq!(Format::from_str("ndjson").unwrap(), Format::Jsonl);
605        assert_eq!(Format::from_str("JSONL").unwrap(), Format::Jsonl);
606    }
607
608    #[test]
609    fn test_format_from_str_xml() {
610        assert_eq!(Format::from_str("xml").unwrap(), Format::Xml);
611    }
612
613    #[test]
614    fn test_format_from_str_msgpack() {
615        assert_eq!(Format::from_str("msgpack").unwrap(), Format::Msgpack);
616        assert_eq!(Format::from_str("messagepack").unwrap(), Format::Msgpack);
617    }
618
619    #[test]
620    fn test_format_from_str_markdown() {
621        assert_eq!(Format::from_str("md").unwrap(), Format::Markdown);
622        assert_eq!(Format::from_str("markdown").unwrap(), Format::Markdown);
623        assert_eq!(Format::from_str("MD").unwrap(), Format::Markdown);
624    }
625
626    #[test]
627    fn test_format_from_str_unknown() {
628        let err = Format::from_str("bin").unwrap_err();
629        assert!(matches!(err, DkitError::UnknownFormat(s) if s == "bin"));
630    }
631
632    // --- Format::Display ---
633
634    #[test]
635    fn test_format_display() {
636        assert_eq!(Format::Json.to_string(), "JSON");
637        assert_eq!(Format::Csv.to_string(), "CSV");
638        assert_eq!(Format::Yaml.to_string(), "YAML");
639        assert_eq!(Format::Toml.to_string(), "TOML");
640        assert_eq!(Format::Jsonl.to_string(), "JSONL");
641        assert_eq!(Format::Xml.to_string(), "XML");
642        assert_eq!(Format::Msgpack.to_string(), "MessagePack");
643        assert_eq!(Format::Markdown.to_string(), "Markdown");
644        assert_eq!(Format::Table.to_string(), "Table");
645    }
646
647    #[test]
648    fn test_format_from_str_table() {
649        assert_eq!(Format::from_str("table").unwrap(), Format::Table);
650        assert_eq!(Format::from_str("TABLE").unwrap(), Format::Table);
651    }
652
653    #[test]
654    fn test_list_output_formats() {
655        let formats = Format::list_output_formats();
656        assert!(formats.len() >= 10);
657        assert!(formats.iter().any(|(name, _)| *name == "table"));
658        assert!(formats.iter().any(|(name, _)| *name == "json"));
659    }
660
661    // --- detect_format ---
662
663    #[test]
664    fn test_detect_format_json() {
665        assert_eq!(
666            detect_format(&PathBuf::from("data.json")).unwrap(),
667            Format::Json
668        );
669    }
670
671    #[test]
672    fn test_detect_format_csv_tsv() {
673        assert_eq!(
674            detect_format(&PathBuf::from("data.csv")).unwrap(),
675            Format::Csv
676        );
677        assert_eq!(
678            detect_format(&PathBuf::from("data.tsv")).unwrap(),
679            Format::Csv
680        );
681    }
682
683    #[test]
684    fn test_detect_format_yaml() {
685        assert_eq!(
686            detect_format(&PathBuf::from("data.yaml")).unwrap(),
687            Format::Yaml
688        );
689        assert_eq!(
690            detect_format(&PathBuf::from("data.yml")).unwrap(),
691            Format::Yaml
692        );
693    }
694
695    #[test]
696    fn test_detect_format_toml() {
697        assert_eq!(
698            detect_format(&PathBuf::from("config.toml")).unwrap(),
699            Format::Toml
700        );
701    }
702
703    #[test]
704    fn test_detect_format_jsonl() {
705        assert_eq!(
706            detect_format(&PathBuf::from("data.jsonl")).unwrap(),
707            Format::Jsonl
708        );
709        assert_eq!(
710            detect_format(&PathBuf::from("data.ndjson")).unwrap(),
711            Format::Jsonl
712        );
713    }
714
715    #[test]
716    fn test_detect_format_xml() {
717        assert_eq!(
718            detect_format(&PathBuf::from("data.xml")).unwrap(),
719            Format::Xml
720        );
721    }
722
723    #[test]
724    fn test_detect_format_msgpack() {
725        assert_eq!(
726            detect_format(&PathBuf::from("data.msgpack")).unwrap(),
727            Format::Msgpack
728        );
729    }
730
731    #[test]
732    fn test_detect_format_markdown() {
733        assert_eq!(
734            detect_format(&PathBuf::from("output.md")).unwrap(),
735            Format::Markdown
736        );
737    }
738
739    #[test]
740    fn test_detect_format_unknown_ext() {
741        let err = detect_format(&PathBuf::from("data.bin")).unwrap_err();
742        assert!(matches!(err, DkitError::UnknownFormat(s) if s == "bin"));
743    }
744
745    #[test]
746    fn test_detect_format_no_extension() {
747        let err = detect_format(&PathBuf::from("Makefile")).unwrap_err();
748        assert!(matches!(err, DkitError::UnknownFormat(s) if s == "(no extension)"));
749    }
750
751    // --- FormatOptions ---
752
753    // --- default_delimiter ---
754
755    #[test]
756    fn test_default_delimiter_tsv() {
757        assert_eq!(default_delimiter(&PathBuf::from("data.tsv")), Some('\t'));
758    }
759
760    #[test]
761    fn test_default_delimiter_csv() {
762        assert_eq!(default_delimiter(&PathBuf::from("data.csv")), None);
763    }
764
765    #[test]
766    fn test_default_delimiter_json() {
767        assert_eq!(default_delimiter(&PathBuf::from("data.json")), None);
768    }
769
770    #[test]
771    fn test_default_delimiter_for_format_tsv() {
772        assert_eq!(default_delimiter_for_format("tsv"), Some('\t'));
773        assert_eq!(default_delimiter_for_format("TSV"), Some('\t'));
774    }
775
776    #[test]
777    fn test_default_delimiter_for_format_csv() {
778        assert_eq!(default_delimiter_for_format("csv"), None);
779    }
780
781    // --- FormatOptions ---
782
783    #[test]
784    fn test_format_options_default() {
785        let opts = FormatOptions::default();
786        assert_eq!(opts.delimiter, None);
787        assert!(!opts.no_header);
788        assert!(opts.pretty);
789        assert!(!opts.compact);
790        assert!(!opts.flow_style);
791        assert_eq!(opts.root_element, None);
792    }
793
794    // --- detect_format_from_content ---
795
796    #[test]
797    fn test_sniff_xml_declaration() {
798        let (fmt, delim) = detect_format_from_content("<?xml version=\"1.0\"?>\n<root/>").unwrap();
799        assert_eq!(fmt, Format::Xml);
800        assert_eq!(delim, None);
801    }
802
803    #[test]
804    fn test_sniff_xml_tag() {
805        let (fmt, _) = detect_format_from_content("<root><item>hello</item></root>").unwrap();
806        assert_eq!(fmt, Format::Xml);
807    }
808
809    #[test]
810    fn test_sniff_json_object() {
811        let (fmt, _) = detect_format_from_content("{\"name\": \"Alice\"}").unwrap();
812        assert_eq!(fmt, Format::Json);
813    }
814
815    #[test]
816    fn test_sniff_json_array() {
817        let (fmt, _) = detect_format_from_content("[1, 2, 3]").unwrap();
818        assert_eq!(fmt, Format::Json);
819    }
820
821    #[test]
822    fn test_sniff_jsonl() {
823        let content = "{\"name\": \"Alice\"}\n{\"name\": \"Bob\"}\n";
824        let (fmt, _) = detect_format_from_content(content).unwrap();
825        assert_eq!(fmt, Format::Jsonl);
826    }
827
828    #[test]
829    fn test_sniff_tsv() {
830        let content = "name\tage\tcity\nAlice\t30\tSeoul\n";
831        let (fmt, delim) = detect_format_from_content(content).unwrap();
832        assert_eq!(fmt, Format::Csv);
833        assert_eq!(delim, Some('\t'));
834    }
835
836    #[test]
837    fn test_sniff_toml_section() {
838        let content = "[database]\nhost = \"localhost\"\nport = 5432\n";
839        let (fmt, _) = detect_format_from_content(content).unwrap();
840        assert_eq!(fmt, Format::Toml);
841    }
842
843    #[test]
844    fn test_sniff_toml_key_value() {
845        let content = "title = \"My App\"\nversion = \"1.0\"\n";
846        let (fmt, _) = detect_format_from_content(content).unwrap();
847        assert_eq!(fmt, Format::Toml);
848    }
849
850    #[test]
851    fn test_sniff_yaml_document() {
852        let content = "---\nname: Alice\nage: 30\n";
853        let (fmt, _) = detect_format_from_content(content).unwrap();
854        assert_eq!(fmt, Format::Yaml);
855    }
856
857    #[test]
858    fn test_sniff_yaml_key_value() {
859        let content = "name: Alice\nage: 30\n";
860        let (fmt, _) = detect_format_from_content(content).unwrap();
861        assert_eq!(fmt, Format::Yaml);
862    }
863
864    #[test]
865    fn test_sniff_csv() {
866        let content = "name,age,city\nAlice,30,Seoul\n";
867        let (fmt, delim) = detect_format_from_content(content).unwrap();
868        assert_eq!(fmt, Format::Csv);
869        assert_eq!(delim, None);
870    }
871
872    #[test]
873    fn test_sniff_empty_content() {
874        let err = detect_format_from_content("").unwrap_err();
875        assert!(matches!(err, DkitError::FormatDetectionFailed(_)));
876    }
877
878    #[test]
879    fn test_sniff_whitespace_only() {
880        let err = detect_format_from_content("   \n  \n").unwrap_err();
881        assert!(matches!(err, DkitError::FormatDetectionFailed(_)));
882    }
883}