Skip to main content

dkit_core/format/
mod.rs

1/// CSV/TSV reader and writer.
2pub mod csv;
3/// .env file reader and writer.
4pub mod env;
5/// HTML table writer.
6pub mod html;
7/// JSON reader, writer, and value conversion utilities.
8pub mod json;
9/// JSON Lines (NDJSON) reader and writer.
10pub mod jsonl;
11/// Markdown table writer.
12pub mod markdown;
13/// TOML reader and writer.
14pub mod toml;
15/// YAML reader and writer.
16pub mod yaml;
17
18// --- Feature-gated format modules ---
19
20/// MessagePack binary reader and writer.
21#[cfg(feature = "msgpack")]
22pub mod msgpack;
23#[cfg(not(feature = "msgpack"))]
24pub mod msgpack {
25    //! Stub module — MessagePack feature not enabled.
26    use super::{FormatReader, FormatWriter};
27    use crate::value::Value;
28    use std::io::{Read, Write};
29
30    const MSG: &str = "MessagePack support requires the 'msgpack' feature.\n  Install with: cargo install dkit --features msgpack";
31
32    pub struct MsgpackReader;
33    impl MsgpackReader {
34        pub fn read_from_bytes(&self, _bytes: &[u8]) -> anyhow::Result<Value> {
35            anyhow::bail!(MSG)
36        }
37    }
38    impl FormatReader for MsgpackReader {
39        fn read(&self, _: &str) -> anyhow::Result<Value> {
40            anyhow::bail!(MSG)
41        }
42        fn read_from_reader(&self, _: impl Read) -> anyhow::Result<Value> {
43            anyhow::bail!(MSG)
44        }
45    }
46    pub struct MsgpackWriter;
47    impl MsgpackWriter {
48        pub fn write_bytes(&self, _value: &Value) -> anyhow::Result<Vec<u8>> {
49            anyhow::bail!(MSG)
50        }
51    }
52    impl FormatWriter for MsgpackWriter {
53        fn write(&self, _: &Value) -> anyhow::Result<String> {
54            anyhow::bail!(MSG)
55        }
56        fn write_to_writer(&self, _: &Value, _: impl Write) -> anyhow::Result<()> {
57            anyhow::bail!(MSG)
58        }
59    }
60}
61
62/// Apache Parquet columnar format reader and writer.
63#[cfg(feature = "parquet")]
64pub mod parquet;
65#[cfg(not(feature = "parquet"))]
66pub mod parquet {
67    //! Stub module — Parquet feature not enabled.
68    use crate::value::Value;
69
70    const MSG: &str = "Parquet support requires the 'parquet' feature.\n  Install with: cargo install dkit --features parquet";
71
72    #[derive(Debug, Clone, Default)]
73    pub struct ParquetOptions {
74        pub row_group: Option<usize>,
75    }
76    pub struct ParquetReader {
77        _options: ParquetOptions,
78    }
79    impl ParquetReader {
80        pub fn new(options: ParquetOptions) -> Self {
81            Self { _options: options }
82        }
83        pub fn read_from_bytes(&self, _bytes: &[u8]) -> anyhow::Result<Value> {
84            anyhow::bail!(MSG)
85        }
86        #[allow(dead_code)]
87        pub fn read_metadata(_bytes: &[u8]) -> anyhow::Result<ParquetMetadata> {
88            anyhow::bail!(MSG)
89        }
90    }
91    #[allow(dead_code)]
92    pub struct ParquetMetadata {
93        pub num_rows: usize,
94        pub num_row_groups: usize,
95        pub columns: Vec<String>,
96        pub column_types: Vec<String>,
97    }
98    #[derive(Debug, Clone, Default)]
99    pub enum ParquetCompression {
100        #[default]
101        None,
102        Snappy,
103        Gzip,
104        Zstd,
105    }
106    impl std::str::FromStr for ParquetCompression {
107        type Err = anyhow::Error;
108        fn from_str(s: &str) -> anyhow::Result<Self> {
109            match s.to_lowercase().as_str() {
110                "none" | "uncompressed" => Ok(Self::None),
111                "snappy" => Ok(Self::Snappy),
112                "gzip" => Ok(Self::Gzip),
113                "zstd" => Ok(Self::Zstd),
114                _ => anyhow::bail!(
115                    "Unknown Parquet compression '{}'. Valid options: none, snappy, gzip, zstd",
116                    s
117                ),
118            }
119        }
120    }
121    #[derive(Debug, Clone, Default)]
122    pub struct ParquetWriteOptions {
123        pub compression: ParquetCompression,
124        pub row_group_size: Option<usize>,
125    }
126    pub struct ParquetWriter {
127        _options: ParquetWriteOptions,
128    }
129    impl ParquetWriter {
130        pub fn new(options: ParquetWriteOptions) -> Self {
131            Self { _options: options }
132        }
133        pub fn write_to_bytes(&self, _value: &Value) -> anyhow::Result<Vec<u8>> {
134            anyhow::bail!(MSG)
135        }
136    }
137    /// Stub for arrow_value_to_value when parquet feature is disabled.
138    pub fn arrow_value_to_value(_array: &dyn std::any::Any, _idx: usize) -> Value {
139        Value::Null
140    }
141}
142
143/// SQLite database reader.
144#[cfg(feature = "sqlite")]
145pub mod sqlite;
146#[cfg(not(feature = "sqlite"))]
147pub mod sqlite {
148    //! Stub module — SQLite feature not enabled.
149    use crate::value::Value;
150    use std::path::Path;
151
152    const MSG: &str = "SQLite support requires the 'sqlite' feature.\n  Install with: cargo install dkit --features sqlite";
153
154    #[derive(Debug, Clone, Default)]
155    pub struct SqliteOptions {
156        pub table: Option<String>,
157        pub sql: Option<String>,
158    }
159    pub struct SqliteReader {
160        _options: SqliteOptions,
161    }
162    impl SqliteReader {
163        pub fn new(options: SqliteOptions) -> Self {
164            Self { _options: options }
165        }
166        pub fn read_from_path(&self, _path: &Path) -> anyhow::Result<Value> {
167            anyhow::bail!(MSG)
168        }
169        pub fn list_tables(_path: &Path) -> anyhow::Result<Vec<String>> {
170            anyhow::bail!(MSG)
171        }
172    }
173}
174
175/// Excel (XLSX) reader.
176#[cfg(feature = "excel")]
177pub mod xlsx;
178#[cfg(not(feature = "excel"))]
179pub mod xlsx {
180    //! Stub module — Excel feature not enabled.
181    use crate::value::Value;
182
183    const MSG: &str = "Excel support requires the 'excel' feature.\n  Install with: cargo install dkit --features excel";
184
185    #[derive(Debug, Clone, Default)]
186    pub struct XlsxOptions {
187        pub sheet: Option<String>,
188        pub header_row: usize,
189    }
190    pub struct XlsxReader {
191        _options: XlsxOptions,
192    }
193    impl XlsxReader {
194        pub fn new(options: XlsxOptions) -> Self {
195            Self { _options: options }
196        }
197        pub fn read_from_bytes(&self, _bytes: &[u8]) -> anyhow::Result<Value> {
198            anyhow::bail!(MSG)
199        }
200        pub fn list_sheets(_bytes: &[u8]) -> anyhow::Result<Vec<String>> {
201            anyhow::bail!(MSG)
202        }
203    }
204}
205
206/// XML reader and writer.
207#[cfg(feature = "xml")]
208pub mod xml;
209#[cfg(not(feature = "xml"))]
210pub mod xml {
211    //! Stub module — XML feature not enabled.
212    use super::{FormatReader, FormatWriter};
213    use crate::value::Value;
214    use std::io::{Read, Write};
215
216    const MSG: &str = "XML support requires the 'xml' feature.\n  Install with: cargo install dkit --features xml";
217
218    #[derive(Default)]
219    pub struct XmlReader {
220        _private: (),
221    }
222    impl XmlReader {
223        #[allow(dead_code)]
224        pub fn new(_strip_namespaces: bool) -> Self {
225            Self { _private: () }
226        }
227    }
228    impl FormatReader for XmlReader {
229        fn read(&self, _: &str) -> anyhow::Result<Value> {
230            anyhow::bail!(MSG)
231        }
232        fn read_from_reader(&self, _: impl Read) -> anyhow::Result<Value> {
233            anyhow::bail!(MSG)
234        }
235    }
236    pub struct XmlWriter {
237        _private: (),
238    }
239    impl XmlWriter {
240        pub fn new(_pretty: bool, _root_element: Option<String>) -> Self {
241            Self { _private: () }
242        }
243    }
244    impl FormatWriter for XmlWriter {
245        fn write(&self, _: &Value) -> anyhow::Result<String> {
246            anyhow::bail!(MSG)
247        }
248        fn write_to_writer(&self, _: &Value, _: impl Write) -> anyhow::Result<()> {
249            anyhow::bail!(MSG)
250        }
251    }
252}
253
254use std::io::{Read, Write};
255use std::path::Path;
256
257use crate::error::DkitError;
258use crate::value::Value;
259
260/// Supported data formats for reading and writing.
261///
262/// Each variant represents a data serialization format that dkit can
263/// convert to or from the unified [`Value`] model.
264#[derive(Debug, Clone, Copy, PartialEq)]
265#[non_exhaustive]
266pub enum Format {
267    /// JSON (`*.json`)
268    Json,
269    /// JSON Lines / NDJSON (`*.jsonl`, `*.ndjson`)
270    Jsonl,
271    /// Comma/Tab-separated values (`*.csv`, `*.tsv`)
272    Csv,
273    /// YAML (`*.yaml`, `*.yml`)
274    Yaml,
275    /// TOML (`*.toml`)
276    Toml,
277    /// XML (`*.xml`)
278    Xml,
279    /// MessagePack binary format (`*.msgpack`)
280    Msgpack,
281    /// Excel spreadsheet (`*.xlsx`, read-only)
282    Xlsx,
283    /// SQLite database (`*.sqlite`, read-only)
284    Sqlite,
285    /// Apache Parquet columnar format (`*.parquet`)
286    Parquet,
287    /// Markdown table (write-only)
288    Markdown,
289    /// HTML table (write-only)
290    Html,
291    /// Terminal table (write-only, used by `dkit view`)
292    Table,
293    /// .env file format (`*.env`, `.env.*`)
294    Env,
295}
296
297impl Format {
298    #[allow(clippy::should_implement_trait)]
299    pub fn from_str(s: &str) -> Result<Self, DkitError> {
300        match s.to_lowercase().as_str() {
301            "json" => Ok(Format::Json),
302            "jsonl" | "jsonlines" | "ndjson" => Ok(Format::Jsonl),
303            "csv" | "tsv" => Ok(Format::Csv),
304            "yaml" | "yml" => Ok(Format::Yaml),
305            "toml" => Ok(Format::Toml),
306            "xml" => Ok(Format::Xml),
307            "msgpack" | "messagepack" => Ok(Format::Msgpack),
308            "xlsx" | "excel" | "xls" => Ok(Format::Xlsx),
309            "sqlite" | "sqlite3" | "db" => Ok(Format::Sqlite),
310            "parquet" | "pq" => Ok(Format::Parquet),
311            "md" | "markdown" => Ok(Format::Markdown),
312            "html" => Ok(Format::Html),
313            "table" => Ok(Format::Table),
314            "env" | "dotenv" => Ok(Format::Env),
315            _ => Err(DkitError::UnknownFormat(s.to_string())),
316        }
317    }
318
319    /// 사용 가능한 출력 포맷 목록을 반환한다
320    pub fn list_output_formats() -> Vec<(&'static str, &'static str)> {
321        let mut formats = vec![
322            ("json", "JSON format"),
323            ("csv", "Comma-separated values"),
324            ("tsv", "Tab-separated values (CSV variant)"),
325            ("yaml", "YAML format"),
326            ("toml", "TOML format"),
327            ("jsonl", "JSON Lines (one JSON object per line)"),
328        ];
329
330        if cfg!(feature = "xml") {
331            formats.push(("xml", "XML format"));
332        } else {
333            formats.push(("xml", "XML format (requires --features xml)"));
334        }
335        if cfg!(feature = "msgpack") {
336            formats.push(("msgpack", "MessagePack binary format"));
337        } else {
338            formats.push((
339                "msgpack",
340                "MessagePack binary format (requires --features msgpack)",
341            ));
342        }
343        if cfg!(feature = "excel") {
344            formats.push(("xlsx", "Excel spreadsheet (input only)"));
345        } else {
346            formats.push(("xlsx", "Excel spreadsheet (requires --features excel)"));
347        }
348        if cfg!(feature = "sqlite") {
349            formats.push(("sqlite", "SQLite database (input only)"));
350        } else {
351            formats.push(("sqlite", "SQLite database (requires --features sqlite)"));
352        }
353        if cfg!(feature = "parquet") {
354            formats.push(("parquet", "Apache Parquet columnar format"));
355        } else {
356            formats.push((
357                "parquet",
358                "Apache Parquet columnar format (requires --features parquet)",
359            ));
360        }
361
362        formats.push(("env", "Environment variables (.env) format"));
363        formats.push(("md", "Markdown table"));
364        formats.push(("html", "HTML table"));
365        formats.push(("table", "Terminal table (default for view)"));
366
367        formats
368    }
369}
370
371impl std::fmt::Display for Format {
372    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
373        match self {
374            Format::Json => write!(f, "JSON"),
375            Format::Jsonl => write!(f, "JSONL"),
376            Format::Csv => write!(f, "CSV"),
377            Format::Yaml => write!(f, "YAML"),
378            Format::Toml => write!(f, "TOML"),
379            Format::Xml => write!(f, "XML"),
380            Format::Msgpack => write!(f, "MessagePack"),
381            Format::Xlsx => write!(f, "Excel"),
382            Format::Sqlite => write!(f, "SQLite"),
383            Format::Parquet => write!(f, "Parquet"),
384            Format::Markdown => write!(f, "Markdown"),
385            Format::Html => write!(f, "HTML"),
386            Format::Table => write!(f, "Table"),
387            Format::Env => write!(f, "ENV"),
388        }
389    }
390}
391
392/// 파일 확장자로 포맷을 자동 감지
393pub fn detect_format(path: &Path) -> Result<Format, DkitError> {
394    // .env 파일 감지: .env, .env.local, .env.development 등
395    if let Some(name) = path.file_name().and_then(|n| n.to_str()) {
396        if name == ".env" || name.starts_with(".env.") {
397            return Ok(Format::Env);
398        }
399    }
400
401    match path.extension().and_then(|e| e.to_str()) {
402        Some("json") => Ok(Format::Json),
403        Some("jsonl" | "ndjson") => Ok(Format::Jsonl),
404        Some("csv" | "tsv") => Ok(Format::Csv),
405        Some("yaml" | "yml") => Ok(Format::Yaml),
406        Some("toml") => Ok(Format::Toml),
407        Some("xml") => Ok(Format::Xml),
408        Some("msgpack") => Ok(Format::Msgpack),
409        Some("xlsx" | "xls" | "xlsm" | "xlsb" | "ods") => Ok(Format::Xlsx),
410        Some("db" | "sqlite" | "sqlite3") => Ok(Format::Sqlite),
411        Some("parquet" | "pq") => Ok(Format::Parquet),
412        Some("md") => Ok(Format::Markdown),
413        Some("html") => Ok(Format::Html),
414        Some("env") => Ok(Format::Env),
415        Some(ext) => Err(DkitError::UnknownFormat(ext.to_string())),
416        None => Err(DkitError::UnknownFormat("(no extension)".to_string())),
417    }
418}
419
420/// 콘텐츠 스니핑으로 포맷을 자동 감지
421///
422/// 감지 우선순위:
423/// 1. `<?xml` → XML
424/// 2. 첫 줄이 JSON 객체 + 둘째 줄도 JSON 객체 → JSONL
425/// 3. `{` 또는 `[` 시작 → JSON
426/// 4. 탭 구분자가 포함된 구조적 데이터 → CSV (TSV)
427/// 5. TOML 패턴 (키 = 값, [섹션])
428/// 6. YAML 패턴 (키: 값, ---)
429pub fn detect_format_from_content(content: &str) -> Result<(Format, Option<char>), DkitError> {
430    let trimmed = content.trim_start();
431
432    if trimmed.is_empty() {
433        return Err(DkitError::FormatDetectionFailed(
434            "input is empty".to_string(),
435        ));
436    }
437
438    // XML: <?xml 또는 루트 태그로 시작
439    if trimmed.starts_with("<?xml") || trimmed.starts_with("<!DOCTYPE") {
440        return Ok((Format::Xml, None));
441    }
442
443    // JSONL: 첫째 줄과 둘째 줄 모두 JSON 객체
444    let mut lines = trimmed.lines().filter(|l| !l.trim().is_empty());
445    if let Some(first_line) = lines.next() {
446        if let Some(second_line) = lines.next() {
447            let first_trimmed = first_line.trim();
448            let second_trimmed = second_line.trim();
449            if first_trimmed.starts_with('{')
450                && first_trimmed.ends_with('}')
451                && second_trimmed.starts_with('{')
452                && second_trimmed.ends_with('}')
453            {
454                return Ok((Format::Jsonl, None));
455            }
456        }
457    }
458
459    // JSON: { 로 시작 (단일 객체)
460    if trimmed.starts_with('{') {
461        return Ok((Format::Json, None));
462    }
463
464    // [ 로 시작: JSON 배열 vs TOML 섹션 헤더 구분
465    // TOML 섹션: [word] 형태 (내부가 알파벳/밑줄/점/하이픈)
466    // JSON 배열: [값, ...] 또는 여러 줄에 걸친 배열
467    if trimmed.starts_with('[') {
468        let first_line = trimmed.lines().next().unwrap_or("").trim();
469        // TOML 섹션 헤더: [section] 또는 [[array]]
470        let is_toml_section = first_line.starts_with("[[")
471            || (first_line.starts_with('[')
472                && first_line.ends_with(']')
473                && !first_line.contains(',')
474                && first_line[1..first_line.len() - 1].chars().all(|c| {
475                    c.is_alphanumeric() || c == '_' || c == '-' || c == '.' || c == ' ' || c == '"'
476                }));
477        if is_toml_section {
478            return Ok((Format::Toml, None));
479        }
480        return Ok((Format::Json, None));
481    }
482
483    // XML: < 로 시작하는 태그 (<?xml 없이 바로 태그로 시작하는 경우)
484    if trimmed.starts_with('<') {
485        return Ok((Format::Xml, None));
486    }
487
488    // TSV: 첫째 줄에 탭이 포함되어 있으면 TSV로 간주
489    if let Some(first_line) = trimmed.lines().next() {
490        if first_line.contains('\t') {
491            return Ok((Format::Csv, Some('\t')));
492        }
493    }
494
495    // ENV: KEY=VALUE 패턴 (대문자 키, = 주변에 공백 없음)
496    // TOML과 구별: TOML은 " = " (공백 포함), ENV는 "KEY=value" (공백 없음, 대문자)
497    let first_line = trimmed.lines().next().unwrap_or("");
498    let ft = first_line.trim();
499    let env_line = ft.strip_prefix("export ").unwrap_or(ft);
500    if let Some(eq_pos) = env_line.find('=') {
501        let key_part = env_line[..eq_pos].trim();
502        if !key_part.is_empty()
503            && !key_part.contains(' ')
504            && key_part
505                .chars()
506                .all(|c| c.is_ascii_uppercase() || c.is_ascii_digit() || c == '_')
507        {
508            // 여러 줄이 모두 ENV 패턴인지 확인
509            let env_lines = trimmed
510                .lines()
511                .filter(|l| {
512                    let t = l.trim();
513                    !t.is_empty() && !t.starts_with('#')
514                })
515                .take(5);
516            let all_env = env_lines.clone().all(|l| {
517                let l = l.trim().strip_prefix("export ").unwrap_or(l.trim());
518                if let Some(p) = l.find('=') {
519                    let k = l[..p].trim();
520                    !k.is_empty()
521                        && !k.contains(' ')
522                        && k.chars()
523                            .all(|c| c.is_ascii_uppercase() || c.is_ascii_digit() || c == '_')
524                } else {
525                    false
526                }
527            });
528            if all_env {
529                return Ok((Format::Env, None));
530            }
531        }
532    }
533
534    // TOML: key = value 패턴 (섹션 헤더는 위에서 처리됨)
535    if ft.contains(" = ") {
536        return Ok((Format::Toml, None));
537    }
538
539    // YAML: --- 또는 key: value 패턴
540    if ft.starts_with("---") || ft.contains(": ") || ft.ends_with(':') {
541        return Ok((Format::Yaml, None));
542    }
543
544    // CSV: 콤마가 포함된 구조적 데이터
545    if ft.contains(',') {
546        return Ok((Format::Csv, None));
547    }
548
549    Err(DkitError::FormatDetectionFailed(
550        "could not determine format from content".to_string(),
551    ))
552}
553
554/// 파일 확장자에 따른 기본 delimiter 반환
555/// `.tsv` 파일은 탭 구분자를 사용한다.
556pub fn default_delimiter(path: &Path) -> Option<char> {
557    match path.extension().and_then(|e| e.to_str()) {
558        Some("tsv") => Some('\t'),
559        _ => None,
560    }
561}
562
563/// `--to` 포맷 문자열에 따른 기본 delimiter 반환
564pub fn default_delimiter_for_format(format_str: &str) -> Option<char> {
565    match format_str.to_lowercase().as_str() {
566        "tsv" => Some('\t'),
567        _ => None,
568    }
569}
570
571/// Format-specific options controlling how data is read or written.
572///
573/// Use [`Default::default()`] to get sensible defaults.
574#[derive(Debug, Clone)]
575pub struct FormatOptions {
576    /// CSV delimiter (기본: ',')
577    pub delimiter: Option<char>,
578    /// CSV 헤더 없음 모드
579    pub no_header: bool,
580    /// Pretty-print 출력
581    pub pretty: bool,
582    /// Compact 출력 (JSON)
583    pub compact: bool,
584    /// YAML inline/flow 스타일
585    pub flow_style: bool,
586    /// XML 루트 엘리먼트 이름 (기본: "root")
587    pub root_element: Option<String>,
588    /// HTML 인라인 CSS 스타일 포함
589    pub styled: bool,
590    /// HTML 완전한 문서 출력
591    pub full_html: bool,
592}
593
594impl Default for FormatOptions {
595    fn default() -> Self {
596        Self {
597            delimiter: None,
598            no_header: false,
599            pretty: true,
600            compact: false,
601            flow_style: false,
602            root_element: None,
603            styled: false,
604            full_html: false,
605        }
606    }
607}
608
609/// Trait for reading a data format into a [`Value`].
610///
611/// Implement this trait to add support for reading a new data format.
612#[allow(dead_code)]
613pub trait FormatReader {
614    /// Parse the given string content and return a [`Value`].
615    fn read(&self, input: &str) -> anyhow::Result<Value>;
616
617    /// Parse data from an [`io::Read`](std::io::Read) source and return a [`Value`].
618    fn read_from_reader(&self, reader: impl Read) -> anyhow::Result<Value>;
619}
620
621/// Trait for writing a [`Value`] to a data format.
622///
623/// Implement this trait to add support for writing a new data format.
624#[allow(dead_code)]
625pub trait FormatWriter {
626    /// Serialize the given [`Value`] and return the formatted string.
627    fn write(&self, value: &Value) -> anyhow::Result<String>;
628
629    /// Serialize the given [`Value`] and write to an [`io::Write`](std::io::Write) destination.
630    fn write_to_writer(&self, value: &Value, writer: impl Write) -> anyhow::Result<()>;
631}
632
633#[cfg(test)]
634mod tests {
635    use super::*;
636    use std::path::PathBuf;
637
638    // --- Format::from_str ---
639
640    #[test]
641    fn test_format_from_str() {
642        assert_eq!(Format::from_str("json").unwrap(), Format::Json);
643        assert_eq!(Format::from_str("JSON").unwrap(), Format::Json);
644        assert_eq!(Format::from_str("csv").unwrap(), Format::Csv);
645        assert_eq!(Format::from_str("tsv").unwrap(), Format::Csv);
646        assert_eq!(Format::from_str("TSV").unwrap(), Format::Csv);
647        assert_eq!(Format::from_str("yaml").unwrap(), Format::Yaml);
648        assert_eq!(Format::from_str("yml").unwrap(), Format::Yaml);
649        assert_eq!(Format::from_str("toml").unwrap(), Format::Toml);
650    }
651
652    #[test]
653    fn test_format_from_str_jsonl() {
654        assert_eq!(Format::from_str("jsonl").unwrap(), Format::Jsonl);
655        assert_eq!(Format::from_str("jsonlines").unwrap(), Format::Jsonl);
656        assert_eq!(Format::from_str("ndjson").unwrap(), Format::Jsonl);
657        assert_eq!(Format::from_str("JSONL").unwrap(), Format::Jsonl);
658    }
659
660    #[test]
661    fn test_format_from_str_xml() {
662        assert_eq!(Format::from_str("xml").unwrap(), Format::Xml);
663    }
664
665    #[test]
666    fn test_format_from_str_msgpack() {
667        assert_eq!(Format::from_str("msgpack").unwrap(), Format::Msgpack);
668        assert_eq!(Format::from_str("messagepack").unwrap(), Format::Msgpack);
669    }
670
671    #[test]
672    fn test_format_from_str_markdown() {
673        assert_eq!(Format::from_str("md").unwrap(), Format::Markdown);
674        assert_eq!(Format::from_str("markdown").unwrap(), Format::Markdown);
675        assert_eq!(Format::from_str("MD").unwrap(), Format::Markdown);
676    }
677
678    #[test]
679    fn test_format_from_str_unknown() {
680        let err = Format::from_str("bin").unwrap_err();
681        assert!(matches!(err, DkitError::UnknownFormat(s) if s == "bin"));
682    }
683
684    // --- Format::Display ---
685
686    #[test]
687    fn test_format_display() {
688        assert_eq!(Format::Json.to_string(), "JSON");
689        assert_eq!(Format::Csv.to_string(), "CSV");
690        assert_eq!(Format::Yaml.to_string(), "YAML");
691        assert_eq!(Format::Toml.to_string(), "TOML");
692        assert_eq!(Format::Jsonl.to_string(), "JSONL");
693        assert_eq!(Format::Xml.to_string(), "XML");
694        assert_eq!(Format::Msgpack.to_string(), "MessagePack");
695        assert_eq!(Format::Markdown.to_string(), "Markdown");
696        assert_eq!(Format::Table.to_string(), "Table");
697    }
698
699    #[test]
700    fn test_format_from_str_table() {
701        assert_eq!(Format::from_str("table").unwrap(), Format::Table);
702        assert_eq!(Format::from_str("TABLE").unwrap(), Format::Table);
703    }
704
705    #[test]
706    fn test_list_output_formats() {
707        let formats = Format::list_output_formats();
708        assert!(formats.len() >= 10);
709        assert!(formats.iter().any(|(name, _)| *name == "table"));
710        assert!(formats.iter().any(|(name, _)| *name == "json"));
711    }
712
713    // --- detect_format ---
714
715    #[test]
716    fn test_detect_format_json() {
717        assert_eq!(
718            detect_format(&PathBuf::from("data.json")).unwrap(),
719            Format::Json
720        );
721    }
722
723    #[test]
724    fn test_detect_format_csv_tsv() {
725        assert_eq!(
726            detect_format(&PathBuf::from("data.csv")).unwrap(),
727            Format::Csv
728        );
729        assert_eq!(
730            detect_format(&PathBuf::from("data.tsv")).unwrap(),
731            Format::Csv
732        );
733    }
734
735    #[test]
736    fn test_detect_format_yaml() {
737        assert_eq!(
738            detect_format(&PathBuf::from("data.yaml")).unwrap(),
739            Format::Yaml
740        );
741        assert_eq!(
742            detect_format(&PathBuf::from("data.yml")).unwrap(),
743            Format::Yaml
744        );
745    }
746
747    #[test]
748    fn test_detect_format_toml() {
749        assert_eq!(
750            detect_format(&PathBuf::from("config.toml")).unwrap(),
751            Format::Toml
752        );
753    }
754
755    #[test]
756    fn test_detect_format_jsonl() {
757        assert_eq!(
758            detect_format(&PathBuf::from("data.jsonl")).unwrap(),
759            Format::Jsonl
760        );
761        assert_eq!(
762            detect_format(&PathBuf::from("data.ndjson")).unwrap(),
763            Format::Jsonl
764        );
765    }
766
767    #[test]
768    fn test_detect_format_xml() {
769        assert_eq!(
770            detect_format(&PathBuf::from("data.xml")).unwrap(),
771            Format::Xml
772        );
773    }
774
775    #[test]
776    fn test_detect_format_msgpack() {
777        assert_eq!(
778            detect_format(&PathBuf::from("data.msgpack")).unwrap(),
779            Format::Msgpack
780        );
781    }
782
783    #[test]
784    fn test_detect_format_markdown() {
785        assert_eq!(
786            detect_format(&PathBuf::from("output.md")).unwrap(),
787            Format::Markdown
788        );
789    }
790
791    #[test]
792    fn test_detect_format_unknown_ext() {
793        let err = detect_format(&PathBuf::from("data.bin")).unwrap_err();
794        assert!(matches!(err, DkitError::UnknownFormat(s) if s == "bin"));
795    }
796
797    #[test]
798    fn test_detect_format_no_extension() {
799        let err = detect_format(&PathBuf::from("Makefile")).unwrap_err();
800        assert!(matches!(err, DkitError::UnknownFormat(s) if s == "(no extension)"));
801    }
802
803    // --- FormatOptions ---
804
805    // --- default_delimiter ---
806
807    #[test]
808    fn test_default_delimiter_tsv() {
809        assert_eq!(default_delimiter(&PathBuf::from("data.tsv")), Some('\t'));
810    }
811
812    #[test]
813    fn test_default_delimiter_csv() {
814        assert_eq!(default_delimiter(&PathBuf::from("data.csv")), None);
815    }
816
817    #[test]
818    fn test_default_delimiter_json() {
819        assert_eq!(default_delimiter(&PathBuf::from("data.json")), None);
820    }
821
822    #[test]
823    fn test_default_delimiter_for_format_tsv() {
824        assert_eq!(default_delimiter_for_format("tsv"), Some('\t'));
825        assert_eq!(default_delimiter_for_format("TSV"), Some('\t'));
826    }
827
828    #[test]
829    fn test_default_delimiter_for_format_csv() {
830        assert_eq!(default_delimiter_for_format("csv"), None);
831    }
832
833    // --- FormatOptions ---
834
835    #[test]
836    fn test_format_options_default() {
837        let opts = FormatOptions::default();
838        assert_eq!(opts.delimiter, None);
839        assert!(!opts.no_header);
840        assert!(opts.pretty);
841        assert!(!opts.compact);
842        assert!(!opts.flow_style);
843        assert_eq!(opts.root_element, None);
844    }
845
846    // --- detect_format_from_content ---
847
848    #[test]
849    fn test_sniff_xml_declaration() {
850        let (fmt, delim) = detect_format_from_content("<?xml version=\"1.0\"?>\n<root/>").unwrap();
851        assert_eq!(fmt, Format::Xml);
852        assert_eq!(delim, None);
853    }
854
855    #[test]
856    fn test_sniff_xml_tag() {
857        let (fmt, _) = detect_format_from_content("<root><item>hello</item></root>").unwrap();
858        assert_eq!(fmt, Format::Xml);
859    }
860
861    #[test]
862    fn test_sniff_json_object() {
863        let (fmt, _) = detect_format_from_content("{\"name\": \"Alice\"}").unwrap();
864        assert_eq!(fmt, Format::Json);
865    }
866
867    #[test]
868    fn test_sniff_json_array() {
869        let (fmt, _) = detect_format_from_content("[1, 2, 3]").unwrap();
870        assert_eq!(fmt, Format::Json);
871    }
872
873    #[test]
874    fn test_sniff_jsonl() {
875        let content = "{\"name\": \"Alice\"}\n{\"name\": \"Bob\"}\n";
876        let (fmt, _) = detect_format_from_content(content).unwrap();
877        assert_eq!(fmt, Format::Jsonl);
878    }
879
880    #[test]
881    fn test_sniff_tsv() {
882        let content = "name\tage\tcity\nAlice\t30\tSeoul\n";
883        let (fmt, delim) = detect_format_from_content(content).unwrap();
884        assert_eq!(fmt, Format::Csv);
885        assert_eq!(delim, Some('\t'));
886    }
887
888    #[test]
889    fn test_sniff_toml_section() {
890        let content = "[database]\nhost = \"localhost\"\nport = 5432\n";
891        let (fmt, _) = detect_format_from_content(content).unwrap();
892        assert_eq!(fmt, Format::Toml);
893    }
894
895    #[test]
896    fn test_sniff_toml_key_value() {
897        let content = "title = \"My App\"\nversion = \"1.0\"\n";
898        let (fmt, _) = detect_format_from_content(content).unwrap();
899        assert_eq!(fmt, Format::Toml);
900    }
901
902    #[test]
903    fn test_sniff_yaml_document() {
904        let content = "---\nname: Alice\nage: 30\n";
905        let (fmt, _) = detect_format_from_content(content).unwrap();
906        assert_eq!(fmt, Format::Yaml);
907    }
908
909    #[test]
910    fn test_sniff_yaml_key_value() {
911        let content = "name: Alice\nage: 30\n";
912        let (fmt, _) = detect_format_from_content(content).unwrap();
913        assert_eq!(fmt, Format::Yaml);
914    }
915
916    #[test]
917    fn test_sniff_csv() {
918        let content = "name,age,city\nAlice,30,Seoul\n";
919        let (fmt, delim) = detect_format_from_content(content).unwrap();
920        assert_eq!(fmt, Format::Csv);
921        assert_eq!(delim, None);
922    }
923
924    #[test]
925    fn test_sniff_empty_content() {
926        let err = detect_format_from_content("").unwrap_err();
927        assert!(matches!(err, DkitError::FormatDetectionFailed(_)));
928    }
929
930    #[test]
931    fn test_sniff_whitespace_only() {
932        let err = detect_format_from_content("   \n  \n").unwrap_err();
933        assert!(matches!(err, DkitError::FormatDetectionFailed(_)));
934    }
935}