Skip to main content

dkit_core/format/
mod.rs

1/// CSV/TSV reader and writer.
2pub mod csv;
3/// .env file reader and writer.
4pub mod env;
5/// HTML table writer.
6pub mod html;
7/// INI/CFG configuration file reader and writer.
8pub mod ini;
9/// JSON reader, writer, and value conversion utilities.
10pub mod json;
11/// JSON Lines (NDJSON) reader and writer.
12pub mod jsonl;
13/// Markdown table writer.
14pub mod markdown;
15/// Java `.properties` file reader and writer.
16pub mod properties;
17/// TOML reader and writer.
18pub mod toml;
19/// YAML reader and writer.
20pub mod yaml;
21
22// --- Feature-gated format modules ---
23
24/// MessagePack binary reader and writer.
25#[cfg(feature = "msgpack")]
26pub mod msgpack;
27#[cfg(not(feature = "msgpack"))]
28pub mod msgpack {
29    //! Stub module — MessagePack feature not enabled.
30    use super::{FormatReader, FormatWriter};
31    use crate::value::Value;
32    use std::io::{Read, Write};
33
34    const MSG: &str = "MessagePack support requires the 'msgpack' feature.\n  Install with: cargo install dkit --features msgpack";
35
36    pub struct MsgpackReader;
37    impl MsgpackReader {
38        pub fn read_from_bytes(&self, _bytes: &[u8]) -> anyhow::Result<Value> {
39            anyhow::bail!(MSG)
40        }
41    }
42    impl FormatReader for MsgpackReader {
43        fn read(&self, _: &str) -> anyhow::Result<Value> {
44            anyhow::bail!(MSG)
45        }
46        fn read_from_reader(&self, _: impl Read) -> anyhow::Result<Value> {
47            anyhow::bail!(MSG)
48        }
49    }
50    pub struct MsgpackWriter;
51    impl MsgpackWriter {
52        pub fn write_bytes(&self, _value: &Value) -> anyhow::Result<Vec<u8>> {
53            anyhow::bail!(MSG)
54        }
55    }
56    impl FormatWriter for MsgpackWriter {
57        fn write(&self, _: &Value) -> anyhow::Result<String> {
58            anyhow::bail!(MSG)
59        }
60        fn write_to_writer(&self, _: &Value, _: impl Write) -> anyhow::Result<()> {
61            anyhow::bail!(MSG)
62        }
63    }
64}
65
66/// Apache Parquet columnar format reader and writer.
67#[cfg(feature = "parquet")]
68pub mod parquet;
69#[cfg(not(feature = "parquet"))]
70pub mod parquet {
71    //! Stub module — Parquet feature not enabled.
72    use crate::value::Value;
73
74    const MSG: &str = "Parquet support requires the 'parquet' feature.\n  Install with: cargo install dkit --features parquet";
75
76    #[derive(Debug, Clone, Default)]
77    pub struct ParquetOptions {
78        pub row_group: Option<usize>,
79    }
80    pub struct ParquetReader {
81        _options: ParquetOptions,
82    }
83    impl ParquetReader {
84        pub fn new(options: ParquetOptions) -> Self {
85            Self { _options: options }
86        }
87        pub fn read_from_bytes(&self, _bytes: &[u8]) -> anyhow::Result<Value> {
88            anyhow::bail!(MSG)
89        }
90        #[allow(dead_code)]
91        pub fn read_metadata(_bytes: &[u8]) -> anyhow::Result<ParquetMetadata> {
92            anyhow::bail!(MSG)
93        }
94    }
95    #[allow(dead_code)]
96    pub struct ParquetMetadata {
97        pub num_rows: usize,
98        pub num_row_groups: usize,
99        pub columns: Vec<String>,
100        pub column_types: Vec<String>,
101    }
102    #[derive(Debug, Clone, Default)]
103    pub enum ParquetCompression {
104        #[default]
105        None,
106        Snappy,
107        Gzip,
108        Zstd,
109    }
110    impl std::str::FromStr for ParquetCompression {
111        type Err = anyhow::Error;
112        fn from_str(s: &str) -> anyhow::Result<Self> {
113            match s.to_lowercase().as_str() {
114                "none" | "uncompressed" => Ok(Self::None),
115                "snappy" => Ok(Self::Snappy),
116                "gzip" => Ok(Self::Gzip),
117                "zstd" => Ok(Self::Zstd),
118                _ => anyhow::bail!(
119                    "Unknown Parquet compression '{}'. Valid options: none, snappy, gzip, zstd",
120                    s
121                ),
122            }
123        }
124    }
125    #[derive(Debug, Clone, Default)]
126    pub struct ParquetWriteOptions {
127        pub compression: ParquetCompression,
128        pub row_group_size: Option<usize>,
129    }
130    pub struct ParquetWriter {
131        _options: ParquetWriteOptions,
132    }
133    impl ParquetWriter {
134        pub fn new(options: ParquetWriteOptions) -> Self {
135            Self { _options: options }
136        }
137        pub fn write_to_bytes(&self, _value: &Value) -> anyhow::Result<Vec<u8>> {
138            anyhow::bail!(MSG)
139        }
140    }
141    /// Stub for arrow_value_to_value when parquet feature is disabled.
142    pub fn arrow_value_to_value(_array: &dyn std::any::Any, _idx: usize) -> Value {
143        Value::Null
144    }
145}
146
147/// SQLite database reader.
148#[cfg(feature = "sqlite")]
149pub mod sqlite;
150#[cfg(not(feature = "sqlite"))]
151pub mod sqlite {
152    //! Stub module — SQLite feature not enabled.
153    use crate::value::Value;
154    use std::path::Path;
155
156    const MSG: &str = "SQLite support requires the 'sqlite' feature.\n  Install with: cargo install dkit --features sqlite";
157
158    #[derive(Debug, Clone, Default)]
159    pub struct SqliteOptions {
160        pub table: Option<String>,
161        pub sql: Option<String>,
162    }
163    pub struct SqliteReader {
164        _options: SqliteOptions,
165    }
166    impl SqliteReader {
167        pub fn new(options: SqliteOptions) -> Self {
168            Self { _options: options }
169        }
170        pub fn read_from_path(&self, _path: &Path) -> anyhow::Result<Value> {
171            anyhow::bail!(MSG)
172        }
173        pub fn list_tables(_path: &Path) -> anyhow::Result<Vec<String>> {
174            anyhow::bail!(MSG)
175        }
176    }
177}
178
179/// Excel (XLSX) reader.
180#[cfg(feature = "excel")]
181pub mod xlsx;
182#[cfg(not(feature = "excel"))]
183pub mod xlsx {
184    //! Stub module — Excel feature not enabled.
185    use crate::value::Value;
186
187    const MSG: &str = "Excel support requires the 'excel' feature.\n  Install with: cargo install dkit --features excel";
188
189    #[derive(Debug, Clone, Default)]
190    pub struct XlsxOptions {
191        pub sheet: Option<String>,
192        pub header_row: usize,
193    }
194    pub struct XlsxReader {
195        _options: XlsxOptions,
196    }
197    impl XlsxReader {
198        pub fn new(options: XlsxOptions) -> Self {
199            Self { _options: options }
200        }
201        pub fn read_from_bytes(&self, _bytes: &[u8]) -> anyhow::Result<Value> {
202            anyhow::bail!(MSG)
203        }
204        pub fn list_sheets(_bytes: &[u8]) -> anyhow::Result<Vec<String>> {
205            anyhow::bail!(MSG)
206        }
207    }
208}
209
210/// XML reader and writer.
211#[cfg(feature = "xml")]
212pub mod xml;
213#[cfg(not(feature = "xml"))]
214pub mod xml {
215    //! Stub module — XML feature not enabled.
216    use super::{FormatReader, FormatWriter};
217    use crate::value::Value;
218    use std::io::{Read, Write};
219
220    const MSG: &str = "XML support requires the 'xml' feature.\n  Install with: cargo install dkit --features xml";
221
222    #[derive(Default)]
223    pub struct XmlReader {
224        _private: (),
225    }
226    impl XmlReader {
227        #[allow(dead_code)]
228        pub fn new(_strip_namespaces: bool) -> Self {
229            Self { _private: () }
230        }
231    }
232    impl FormatReader for XmlReader {
233        fn read(&self, _: &str) -> anyhow::Result<Value> {
234            anyhow::bail!(MSG)
235        }
236        fn read_from_reader(&self, _: impl Read) -> anyhow::Result<Value> {
237            anyhow::bail!(MSG)
238        }
239    }
240    pub struct XmlWriter {
241        _private: (),
242    }
243    impl XmlWriter {
244        pub fn new(_pretty: bool, _root_element: Option<String>) -> Self {
245            Self { _private: () }
246        }
247    }
248    impl FormatWriter for XmlWriter {
249        fn write(&self, _: &Value) -> anyhow::Result<String> {
250            anyhow::bail!(MSG)
251        }
252        fn write_to_writer(&self, _: &Value, _: impl Write) -> anyhow::Result<()> {
253            anyhow::bail!(MSG)
254        }
255    }
256}
257
258use std::io::{Read, Write};
259use std::path::Path;
260
261use crate::error::DkitError;
262use crate::value::Value;
263
264/// Supported data formats for reading and writing.
265///
266/// Each variant represents a data serialization format that dkit can
267/// convert to or from the unified [`Value`] model.
268#[derive(Debug, Clone, Copy, PartialEq)]
269#[non_exhaustive]
270pub enum Format {
271    /// JSON (`*.json`)
272    Json,
273    /// JSON Lines / NDJSON (`*.jsonl`, `*.ndjson`)
274    Jsonl,
275    /// Comma/Tab-separated values (`*.csv`, `*.tsv`)
276    Csv,
277    /// YAML (`*.yaml`, `*.yml`)
278    Yaml,
279    /// TOML (`*.toml`)
280    Toml,
281    /// XML (`*.xml`)
282    Xml,
283    /// MessagePack binary format (`*.msgpack`)
284    Msgpack,
285    /// Excel spreadsheet (`*.xlsx`, read-only)
286    Xlsx,
287    /// SQLite database (`*.sqlite`, read-only)
288    Sqlite,
289    /// Apache Parquet columnar format (`*.parquet`)
290    Parquet,
291    /// Markdown table (write-only)
292    Markdown,
293    /// HTML table (write-only)
294    Html,
295    /// Terminal table (write-only, used by `dkit view`)
296    Table,
297    /// .env file format (`*.env`, `.env.*`)
298    Env,
299    /// INI/CFG configuration file format (`*.ini`, `*.cfg`)
300    Ini,
301    /// Java `.properties` file format (`*.properties`)
302    Properties,
303}
304
305impl Format {
306    #[allow(clippy::should_implement_trait)]
307    pub fn from_str(s: &str) -> Result<Self, DkitError> {
308        match s.to_lowercase().as_str() {
309            "json" => Ok(Format::Json),
310            "jsonl" | "jsonlines" | "ndjson" => Ok(Format::Jsonl),
311            "csv" | "tsv" => Ok(Format::Csv),
312            "yaml" | "yml" => Ok(Format::Yaml),
313            "toml" => Ok(Format::Toml),
314            "xml" => Ok(Format::Xml),
315            "msgpack" | "messagepack" => Ok(Format::Msgpack),
316            "xlsx" | "excel" | "xls" => Ok(Format::Xlsx),
317            "sqlite" | "sqlite3" | "db" => Ok(Format::Sqlite),
318            "parquet" | "pq" => Ok(Format::Parquet),
319            "md" | "markdown" => Ok(Format::Markdown),
320            "html" => Ok(Format::Html),
321            "table" => Ok(Format::Table),
322            "env" | "dotenv" => Ok(Format::Env),
323            "ini" | "cfg" | "conf" | "config" => Ok(Format::Ini),
324            "properties" => Ok(Format::Properties),
325            _ => Err(DkitError::UnknownFormat(s.to_string())),
326        }
327    }
328
329    /// 사용 가능한 출력 포맷 목록을 반환한다
330    pub fn list_output_formats() -> Vec<(&'static str, &'static str)> {
331        let mut formats = vec![
332            ("json", "JSON format"),
333            ("csv", "Comma-separated values"),
334            ("tsv", "Tab-separated values (CSV variant)"),
335            ("yaml", "YAML format"),
336            ("toml", "TOML format"),
337            ("jsonl", "JSON Lines (one JSON object per line)"),
338        ];
339
340        if cfg!(feature = "xml") {
341            formats.push(("xml", "XML format"));
342        } else {
343            formats.push(("xml", "XML format (requires --features xml)"));
344        }
345        if cfg!(feature = "msgpack") {
346            formats.push(("msgpack", "MessagePack binary format"));
347        } else {
348            formats.push((
349                "msgpack",
350                "MessagePack binary format (requires --features msgpack)",
351            ));
352        }
353        if cfg!(feature = "excel") {
354            formats.push(("xlsx", "Excel spreadsheet (input only)"));
355        } else {
356            formats.push(("xlsx", "Excel spreadsheet (requires --features excel)"));
357        }
358        if cfg!(feature = "sqlite") {
359            formats.push(("sqlite", "SQLite database (input only)"));
360        } else {
361            formats.push(("sqlite", "SQLite database (requires --features sqlite)"));
362        }
363        if cfg!(feature = "parquet") {
364            formats.push(("parquet", "Apache Parquet columnar format"));
365        } else {
366            formats.push((
367                "parquet",
368                "Apache Parquet columnar format (requires --features parquet)",
369            ));
370        }
371
372        formats.push(("env", "Environment variables (.env) format"));
373        formats.push(("ini", "INI/CFG configuration file format"));
374        formats.push(("properties", "Java .properties file format"));
375        formats.push(("md", "Markdown table"));
376        formats.push(("html", "HTML table"));
377        formats.push(("table", "Terminal table (default for view)"));
378
379        formats
380    }
381}
382
383impl std::fmt::Display for Format {
384    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
385        match self {
386            Format::Json => write!(f, "JSON"),
387            Format::Jsonl => write!(f, "JSONL"),
388            Format::Csv => write!(f, "CSV"),
389            Format::Yaml => write!(f, "YAML"),
390            Format::Toml => write!(f, "TOML"),
391            Format::Xml => write!(f, "XML"),
392            Format::Msgpack => write!(f, "MessagePack"),
393            Format::Xlsx => write!(f, "Excel"),
394            Format::Sqlite => write!(f, "SQLite"),
395            Format::Parquet => write!(f, "Parquet"),
396            Format::Markdown => write!(f, "Markdown"),
397            Format::Html => write!(f, "HTML"),
398            Format::Table => write!(f, "Table"),
399            Format::Env => write!(f, "ENV"),
400            Format::Ini => write!(f, "INI"),
401            Format::Properties => write!(f, "Properties"),
402        }
403    }
404}
405
406/// 파일 확장자로 포맷을 자동 감지
407pub fn detect_format(path: &Path) -> Result<Format, DkitError> {
408    // .env 파일 감지: .env, .env.local, .env.development 등
409    if let Some(name) = path.file_name().and_then(|n| n.to_str()) {
410        if name == ".env" || name.starts_with(".env.") {
411            return Ok(Format::Env);
412        }
413    }
414
415    match path.extension().and_then(|e| e.to_str()) {
416        Some("json") => Ok(Format::Json),
417        Some("jsonl" | "ndjson") => Ok(Format::Jsonl),
418        Some("csv" | "tsv") => Ok(Format::Csv),
419        Some("yaml" | "yml") => Ok(Format::Yaml),
420        Some("toml") => Ok(Format::Toml),
421        Some("xml") => Ok(Format::Xml),
422        Some("msgpack") => Ok(Format::Msgpack),
423        Some("xlsx" | "xls" | "xlsm" | "xlsb" | "ods") => Ok(Format::Xlsx),
424        Some("db" | "sqlite" | "sqlite3") => Ok(Format::Sqlite),
425        Some("parquet" | "pq") => Ok(Format::Parquet),
426        Some("md") => Ok(Format::Markdown),
427        Some("html") => Ok(Format::Html),
428        Some("env") => Ok(Format::Env),
429        Some("ini" | "cfg") => Ok(Format::Ini),
430        Some("properties") => Ok(Format::Properties),
431        Some(ext) => Err(DkitError::UnknownFormat(ext.to_string())),
432        None => Err(DkitError::UnknownFormat("(no extension)".to_string())),
433    }
434}
435
436/// 콘텐츠 스니핑으로 포맷을 자동 감지
437///
438/// 감지 우선순위:
439/// 1. `<?xml` → XML
440/// 2. 첫 줄이 JSON 객체 + 둘째 줄도 JSON 객체 → JSONL
441/// 3. `{` 또는 `[` 시작 → JSON
442/// 4. 탭 구분자가 포함된 구조적 데이터 → CSV (TSV)
443/// 5. TOML 패턴 (키 = 값, [섹션])
444/// 6. YAML 패턴 (키: 값, ---)
445pub fn detect_format_from_content(content: &str) -> Result<(Format, Option<char>), DkitError> {
446    let trimmed = content.trim_start();
447
448    if trimmed.is_empty() {
449        return Err(DkitError::FormatDetectionFailed(
450            "input is empty".to_string(),
451        ));
452    }
453
454    // XML: <?xml 또는 루트 태그로 시작
455    if trimmed.starts_with("<?xml") || trimmed.starts_with("<!DOCTYPE") {
456        return Ok((Format::Xml, None));
457    }
458
459    // JSONL: 첫째 줄과 둘째 줄 모두 JSON 객체
460    let mut lines = trimmed.lines().filter(|l| !l.trim().is_empty());
461    if let Some(first_line) = lines.next() {
462        if let Some(second_line) = lines.next() {
463            let first_trimmed = first_line.trim();
464            let second_trimmed = second_line.trim();
465            if first_trimmed.starts_with('{')
466                && first_trimmed.ends_with('}')
467                && second_trimmed.starts_with('{')
468                && second_trimmed.ends_with('}')
469            {
470                return Ok((Format::Jsonl, None));
471            }
472        }
473    }
474
475    // JSON: { 로 시작 (단일 객체)
476    if trimmed.starts_with('{') {
477        return Ok((Format::Json, None));
478    }
479
480    // [ 로 시작: JSON 배열 vs TOML 섹션 헤더 구분
481    // TOML 섹션: [word] 형태 (내부가 알파벳/밑줄/점/하이픈)
482    // JSON 배열: [값, ...] 또는 여러 줄에 걸친 배열
483    if trimmed.starts_with('[') {
484        let first_line = trimmed.lines().next().unwrap_or("").trim();
485        // TOML 섹션 헤더: [section] 또는 [[array]]
486        let is_toml_section = first_line.starts_with("[[")
487            || (first_line.starts_with('[')
488                && first_line.ends_with(']')
489                && !first_line.contains(',')
490                && first_line[1..first_line.len() - 1].chars().all(|c| {
491                    c.is_alphanumeric() || c == '_' || c == '-' || c == '.' || c == ' ' || c == '"'
492                }));
493        if is_toml_section {
494            return Ok((Format::Toml, None));
495        }
496        return Ok((Format::Json, None));
497    }
498
499    // XML: < 로 시작하는 태그 (<?xml 없이 바로 태그로 시작하는 경우)
500    if trimmed.starts_with('<') {
501        return Ok((Format::Xml, None));
502    }
503
504    // TSV: 첫째 줄에 탭이 포함되어 있으면 TSV로 간주
505    if let Some(first_line) = trimmed.lines().next() {
506        if first_line.contains('\t') {
507            return Ok((Format::Csv, Some('\t')));
508        }
509    }
510
511    // ENV: KEY=VALUE 패턴 (대문자 키, = 주변에 공백 없음)
512    // TOML과 구별: TOML은 " = " (공백 포함), ENV는 "KEY=value" (공백 없음, 대문자)
513    let first_line = trimmed.lines().next().unwrap_or("");
514    let ft = first_line.trim();
515    let env_line = ft.strip_prefix("export ").unwrap_or(ft);
516    if let Some(eq_pos) = env_line.find('=') {
517        let key_part = env_line[..eq_pos].trim();
518        if !key_part.is_empty()
519            && !key_part.contains(' ')
520            && key_part
521                .chars()
522                .all(|c| c.is_ascii_uppercase() || c.is_ascii_digit() || c == '_')
523        {
524            // 여러 줄이 모두 ENV 패턴인지 확인
525            let env_lines = trimmed
526                .lines()
527                .filter(|l| {
528                    let t = l.trim();
529                    !t.is_empty() && !t.starts_with('#')
530                })
531                .take(5);
532            let all_env = env_lines.clone().all(|l| {
533                let l = l.trim().strip_prefix("export ").unwrap_or(l.trim());
534                if let Some(p) = l.find('=') {
535                    let k = l[..p].trim();
536                    !k.is_empty()
537                        && !k.contains(' ')
538                        && k.chars()
539                            .all(|c| c.is_ascii_uppercase() || c.is_ascii_digit() || c == '_')
540                } else {
541                    false
542                }
543            });
544            if all_env {
545                return Ok((Format::Env, None));
546            }
547        }
548    }
549
550    // TOML: key = value 패턴 (섹션 헤더는 위에서 처리됨)
551    if ft.contains(" = ") {
552        return Ok((Format::Toml, None));
553    }
554
555    // YAML: --- 또는 key: value 패턴
556    if ft.starts_with("---") || ft.contains(": ") || ft.ends_with(':') {
557        return Ok((Format::Yaml, None));
558    }
559
560    // CSV: 콤마가 포함된 구조적 데이터
561    if ft.contains(',') {
562        return Ok((Format::Csv, None));
563    }
564
565    Err(DkitError::FormatDetectionFailed(
566        "could not determine format from content".to_string(),
567    ))
568}
569
570/// 파일 확장자에 따른 기본 delimiter 반환
571/// `.tsv` 파일은 탭 구분자를 사용한다.
572pub fn default_delimiter(path: &Path) -> Option<char> {
573    match path.extension().and_then(|e| e.to_str()) {
574        Some("tsv") => Some('\t'),
575        _ => None,
576    }
577}
578
579/// `--to` 포맷 문자열에 따른 기본 delimiter 반환
580pub fn default_delimiter_for_format(format_str: &str) -> Option<char> {
581    match format_str.to_lowercase().as_str() {
582        "tsv" => Some('\t'),
583        _ => None,
584    }
585}
586
587/// Format-specific options controlling how data is read or written.
588///
589/// Use [`Default::default()`] to get sensible defaults.
590#[derive(Debug, Clone)]
591pub struct FormatOptions {
592    /// CSV delimiter (기본: ',')
593    pub delimiter: Option<char>,
594    /// CSV 헤더 없음 모드
595    pub no_header: bool,
596    /// Pretty-print 출력
597    pub pretty: bool,
598    /// Compact 출력 (JSON)
599    pub compact: bool,
600    /// YAML inline/flow 스타일
601    pub flow_style: bool,
602    /// XML 루트 엘리먼트 이름 (기본: "root")
603    pub root_element: Option<String>,
604    /// HTML 인라인 CSS 스타일 포함
605    pub styled: bool,
606    /// HTML 완전한 문서 출력
607    pub full_html: bool,
608    /// JSON 들여쓰기 설정 (숫자: 스페이스 수, "tab": 탭 문자)
609    pub indent: Option<String>,
610    /// JSON 오브젝트 키를 알파벳순으로 정렬
611    pub sort_keys: bool,
612}
613
614impl Default for FormatOptions {
615    fn default() -> Self {
616        Self {
617            delimiter: None,
618            no_header: false,
619            pretty: true,
620            compact: false,
621            flow_style: false,
622            root_element: None,
623            styled: false,
624            full_html: false,
625            indent: None,
626            sort_keys: false,
627        }
628    }
629}
630
631/// Trait for reading a data format into a [`Value`].
632///
633/// Implement this trait to add support for reading a new data format.
634#[allow(dead_code)]
635pub trait FormatReader {
636    /// Parse the given string content and return a [`Value`].
637    fn read(&self, input: &str) -> anyhow::Result<Value>;
638
639    /// Parse data from an [`io::Read`](std::io::Read) source and return a [`Value`].
640    fn read_from_reader(&self, reader: impl Read) -> anyhow::Result<Value>;
641}
642
643/// Trait for writing a [`Value`] to a data format.
644///
645/// Implement this trait to add support for writing a new data format.
646#[allow(dead_code)]
647pub trait FormatWriter {
648    /// Serialize the given [`Value`] and return the formatted string.
649    fn write(&self, value: &Value) -> anyhow::Result<String>;
650
651    /// Serialize the given [`Value`] and write to an [`io::Write`](std::io::Write) destination.
652    fn write_to_writer(&self, value: &Value, writer: impl Write) -> anyhow::Result<()>;
653}
654
655#[cfg(test)]
656mod tests {
657    use super::*;
658    use std::path::PathBuf;
659
660    // --- Format::from_str ---
661
662    #[test]
663    fn test_format_from_str() {
664        assert_eq!(Format::from_str("json").unwrap(), Format::Json);
665        assert_eq!(Format::from_str("JSON").unwrap(), Format::Json);
666        assert_eq!(Format::from_str("csv").unwrap(), Format::Csv);
667        assert_eq!(Format::from_str("tsv").unwrap(), Format::Csv);
668        assert_eq!(Format::from_str("TSV").unwrap(), Format::Csv);
669        assert_eq!(Format::from_str("yaml").unwrap(), Format::Yaml);
670        assert_eq!(Format::from_str("yml").unwrap(), Format::Yaml);
671        assert_eq!(Format::from_str("toml").unwrap(), Format::Toml);
672    }
673
674    #[test]
675    fn test_format_from_str_jsonl() {
676        assert_eq!(Format::from_str("jsonl").unwrap(), Format::Jsonl);
677        assert_eq!(Format::from_str("jsonlines").unwrap(), Format::Jsonl);
678        assert_eq!(Format::from_str("ndjson").unwrap(), Format::Jsonl);
679        assert_eq!(Format::from_str("JSONL").unwrap(), Format::Jsonl);
680    }
681
682    #[test]
683    fn test_format_from_str_xml() {
684        assert_eq!(Format::from_str("xml").unwrap(), Format::Xml);
685    }
686
687    #[test]
688    fn test_format_from_str_msgpack() {
689        assert_eq!(Format::from_str("msgpack").unwrap(), Format::Msgpack);
690        assert_eq!(Format::from_str("messagepack").unwrap(), Format::Msgpack);
691    }
692
693    #[test]
694    fn test_format_from_str_markdown() {
695        assert_eq!(Format::from_str("md").unwrap(), Format::Markdown);
696        assert_eq!(Format::from_str("markdown").unwrap(), Format::Markdown);
697        assert_eq!(Format::from_str("MD").unwrap(), Format::Markdown);
698    }
699
700    #[test]
701    fn test_format_from_str_unknown() {
702        let err = Format::from_str("bin").unwrap_err();
703        assert!(matches!(err, DkitError::UnknownFormat(s) if s == "bin"));
704    }
705
706    // --- Format::Display ---
707
708    #[test]
709    fn test_format_display() {
710        assert_eq!(Format::Json.to_string(), "JSON");
711        assert_eq!(Format::Csv.to_string(), "CSV");
712        assert_eq!(Format::Yaml.to_string(), "YAML");
713        assert_eq!(Format::Toml.to_string(), "TOML");
714        assert_eq!(Format::Jsonl.to_string(), "JSONL");
715        assert_eq!(Format::Xml.to_string(), "XML");
716        assert_eq!(Format::Msgpack.to_string(), "MessagePack");
717        assert_eq!(Format::Markdown.to_string(), "Markdown");
718        assert_eq!(Format::Table.to_string(), "Table");
719    }
720
721    #[test]
722    fn test_format_from_str_table() {
723        assert_eq!(Format::from_str("table").unwrap(), Format::Table);
724        assert_eq!(Format::from_str("TABLE").unwrap(), Format::Table);
725    }
726
727    #[test]
728    fn test_list_output_formats() {
729        let formats = Format::list_output_formats();
730        assert!(formats.len() >= 10);
731        assert!(formats.iter().any(|(name, _)| *name == "table"));
732        assert!(formats.iter().any(|(name, _)| *name == "json"));
733    }
734
735    // --- detect_format ---
736
737    #[test]
738    fn test_detect_format_json() {
739        assert_eq!(
740            detect_format(&PathBuf::from("data.json")).unwrap(),
741            Format::Json
742        );
743    }
744
745    #[test]
746    fn test_detect_format_csv_tsv() {
747        assert_eq!(
748            detect_format(&PathBuf::from("data.csv")).unwrap(),
749            Format::Csv
750        );
751        assert_eq!(
752            detect_format(&PathBuf::from("data.tsv")).unwrap(),
753            Format::Csv
754        );
755    }
756
757    #[test]
758    fn test_detect_format_yaml() {
759        assert_eq!(
760            detect_format(&PathBuf::from("data.yaml")).unwrap(),
761            Format::Yaml
762        );
763        assert_eq!(
764            detect_format(&PathBuf::from("data.yml")).unwrap(),
765            Format::Yaml
766        );
767    }
768
769    #[test]
770    fn test_detect_format_toml() {
771        assert_eq!(
772            detect_format(&PathBuf::from("config.toml")).unwrap(),
773            Format::Toml
774        );
775    }
776
777    #[test]
778    fn test_detect_format_jsonl() {
779        assert_eq!(
780            detect_format(&PathBuf::from("data.jsonl")).unwrap(),
781            Format::Jsonl
782        );
783        assert_eq!(
784            detect_format(&PathBuf::from("data.ndjson")).unwrap(),
785            Format::Jsonl
786        );
787    }
788
789    #[test]
790    fn test_detect_format_xml() {
791        assert_eq!(
792            detect_format(&PathBuf::from("data.xml")).unwrap(),
793            Format::Xml
794        );
795    }
796
797    #[test]
798    fn test_detect_format_msgpack() {
799        assert_eq!(
800            detect_format(&PathBuf::from("data.msgpack")).unwrap(),
801            Format::Msgpack
802        );
803    }
804
805    #[test]
806    fn test_detect_format_markdown() {
807        assert_eq!(
808            detect_format(&PathBuf::from("output.md")).unwrap(),
809            Format::Markdown
810        );
811    }
812
813    #[test]
814    fn test_detect_format_unknown_ext() {
815        let err = detect_format(&PathBuf::from("data.bin")).unwrap_err();
816        assert!(matches!(err, DkitError::UnknownFormat(s) if s == "bin"));
817    }
818
819    #[test]
820    fn test_detect_format_no_extension() {
821        let err = detect_format(&PathBuf::from("Makefile")).unwrap_err();
822        assert!(matches!(err, DkitError::UnknownFormat(s) if s == "(no extension)"));
823    }
824
825    // --- FormatOptions ---
826
827    // --- default_delimiter ---
828
829    #[test]
830    fn test_default_delimiter_tsv() {
831        assert_eq!(default_delimiter(&PathBuf::from("data.tsv")), Some('\t'));
832    }
833
834    #[test]
835    fn test_default_delimiter_csv() {
836        assert_eq!(default_delimiter(&PathBuf::from("data.csv")), None);
837    }
838
839    #[test]
840    fn test_default_delimiter_json() {
841        assert_eq!(default_delimiter(&PathBuf::from("data.json")), None);
842    }
843
844    #[test]
845    fn test_default_delimiter_for_format_tsv() {
846        assert_eq!(default_delimiter_for_format("tsv"), Some('\t'));
847        assert_eq!(default_delimiter_for_format("TSV"), Some('\t'));
848    }
849
850    #[test]
851    fn test_default_delimiter_for_format_csv() {
852        assert_eq!(default_delimiter_for_format("csv"), None);
853    }
854
855    // --- FormatOptions ---
856
857    #[test]
858    fn test_format_options_default() {
859        let opts = FormatOptions::default();
860        assert_eq!(opts.delimiter, None);
861        assert!(!opts.no_header);
862        assert!(opts.pretty);
863        assert!(!opts.compact);
864        assert!(!opts.flow_style);
865        assert_eq!(opts.root_element, None);
866    }
867
868    // --- detect_format_from_content ---
869
870    #[test]
871    fn test_sniff_xml_declaration() {
872        let (fmt, delim) = detect_format_from_content("<?xml version=\"1.0\"?>\n<root/>").unwrap();
873        assert_eq!(fmt, Format::Xml);
874        assert_eq!(delim, None);
875    }
876
877    #[test]
878    fn test_sniff_xml_tag() {
879        let (fmt, _) = detect_format_from_content("<root><item>hello</item></root>").unwrap();
880        assert_eq!(fmt, Format::Xml);
881    }
882
883    #[test]
884    fn test_sniff_json_object() {
885        let (fmt, _) = detect_format_from_content("{\"name\": \"Alice\"}").unwrap();
886        assert_eq!(fmt, Format::Json);
887    }
888
889    #[test]
890    fn test_sniff_json_array() {
891        let (fmt, _) = detect_format_from_content("[1, 2, 3]").unwrap();
892        assert_eq!(fmt, Format::Json);
893    }
894
895    #[test]
896    fn test_sniff_jsonl() {
897        let content = "{\"name\": \"Alice\"}\n{\"name\": \"Bob\"}\n";
898        let (fmt, _) = detect_format_from_content(content).unwrap();
899        assert_eq!(fmt, Format::Jsonl);
900    }
901
902    #[test]
903    fn test_sniff_tsv() {
904        let content = "name\tage\tcity\nAlice\t30\tSeoul\n";
905        let (fmt, delim) = detect_format_from_content(content).unwrap();
906        assert_eq!(fmt, Format::Csv);
907        assert_eq!(delim, Some('\t'));
908    }
909
910    #[test]
911    fn test_sniff_toml_section() {
912        let content = "[database]\nhost = \"localhost\"\nport = 5432\n";
913        let (fmt, _) = detect_format_from_content(content).unwrap();
914        assert_eq!(fmt, Format::Toml);
915    }
916
917    #[test]
918    fn test_sniff_toml_key_value() {
919        let content = "title = \"My App\"\nversion = \"1.0\"\n";
920        let (fmt, _) = detect_format_from_content(content).unwrap();
921        assert_eq!(fmt, Format::Toml);
922    }
923
924    #[test]
925    fn test_sniff_yaml_document() {
926        let content = "---\nname: Alice\nage: 30\n";
927        let (fmt, _) = detect_format_from_content(content).unwrap();
928        assert_eq!(fmt, Format::Yaml);
929    }
930
931    #[test]
932    fn test_sniff_yaml_key_value() {
933        let content = "name: Alice\nage: 30\n";
934        let (fmt, _) = detect_format_from_content(content).unwrap();
935        assert_eq!(fmt, Format::Yaml);
936    }
937
938    #[test]
939    fn test_sniff_csv() {
940        let content = "name,age,city\nAlice,30,Seoul\n";
941        let (fmt, delim) = detect_format_from_content(content).unwrap();
942        assert_eq!(fmt, Format::Csv);
943        assert_eq!(delim, None);
944    }
945
946    #[test]
947    fn test_sniff_empty_content() {
948        let err = detect_format_from_content("").unwrap_err();
949        assert!(matches!(err, DkitError::FormatDetectionFailed(_)));
950    }
951
952    #[test]
953    fn test_sniff_whitespace_only() {
954        let err = detect_format_from_content("   \n  \n").unwrap_err();
955        assert!(matches!(err, DkitError::FormatDetectionFailed(_)));
956    }
957}