Skip to main content

dkit_core/format/
mod.rs

1/// CSV/TSV reader and writer.
2pub mod csv;
3/// .env file reader and writer.
4pub mod env;
5/// HTML table writer.
6pub mod html;
7/// INI/CFG configuration file reader and writer.
8pub mod ini;
9/// JSON reader, writer, and value conversion utilities.
10pub mod json;
11/// JSON Lines (NDJSON) reader and writer.
12pub mod jsonl;
13/// Markdown table writer.
14pub mod markdown;
15/// Java `.properties` file reader and writer.
16pub mod properties;
17/// TOML reader and writer.
18pub mod toml;
19/// YAML reader and writer.
20pub mod yaml;
21
22// --- Feature-gated format modules ---
23
24/// MessagePack binary reader and writer.
25#[cfg(feature = "msgpack")]
26pub mod msgpack;
27#[cfg(not(feature = "msgpack"))]
28pub mod msgpack {
29    //! Stub module — MessagePack feature not enabled.
30    use super::{FormatReader, FormatWriter};
31    use crate::value::Value;
32    use std::io::{Read, Write};
33
34    const MSG: &str = "MessagePack support requires the 'msgpack' feature.\n  Install with: cargo install dkit --features msgpack";
35
36    pub struct MsgpackReader;
37    impl MsgpackReader {
38        pub fn read_from_bytes(&self, _bytes: &[u8]) -> anyhow::Result<Value> {
39            anyhow::bail!(MSG)
40        }
41    }
42    impl FormatReader for MsgpackReader {
43        fn read(&self, _: &str) -> anyhow::Result<Value> {
44            anyhow::bail!(MSG)
45        }
46        fn read_from_reader(&self, _: impl Read) -> anyhow::Result<Value> {
47            anyhow::bail!(MSG)
48        }
49    }
50    pub struct MsgpackWriter;
51    impl MsgpackWriter {
52        pub fn write_bytes(&self, _value: &Value) -> anyhow::Result<Vec<u8>> {
53            anyhow::bail!(MSG)
54        }
55    }
56    impl FormatWriter for MsgpackWriter {
57        fn write(&self, _: &Value) -> anyhow::Result<String> {
58            anyhow::bail!(MSG)
59        }
60        fn write_to_writer(&self, _: &Value, _: impl Write) -> anyhow::Result<()> {
61            anyhow::bail!(MSG)
62        }
63    }
64}
65
66/// Apache Parquet columnar format reader and writer.
67#[cfg(feature = "parquet")]
68pub mod parquet;
69#[cfg(not(feature = "parquet"))]
70pub mod parquet {
71    //! Stub module — Parquet feature not enabled.
72    use crate::value::Value;
73
74    const MSG: &str = "Parquet support requires the 'parquet' feature.\n  Install with: cargo install dkit --features parquet";
75
76    #[derive(Debug, Clone, Default)]
77    pub struct ParquetOptions {
78        pub row_group: Option<usize>,
79    }
80    pub struct ParquetReader {
81        _options: ParquetOptions,
82    }
83    impl ParquetReader {
84        pub fn new(options: ParquetOptions) -> Self {
85            Self { _options: options }
86        }
87        pub fn read_from_bytes(&self, _bytes: &[u8]) -> anyhow::Result<Value> {
88            anyhow::bail!(MSG)
89        }
90        #[allow(dead_code)]
91        pub fn read_metadata(_bytes: &[u8]) -> anyhow::Result<ParquetMetadata> {
92            anyhow::bail!(MSG)
93        }
94    }
95    #[allow(dead_code)]
96    pub struct ParquetMetadata {
97        pub num_rows: usize,
98        pub num_row_groups: usize,
99        pub columns: Vec<String>,
100        pub column_types: Vec<String>,
101    }
102    #[derive(Debug, Clone, Default)]
103    pub enum ParquetCompression {
104        #[default]
105        None,
106        Snappy,
107        Gzip,
108        Zstd,
109    }
110    impl std::str::FromStr for ParquetCompression {
111        type Err = anyhow::Error;
112        fn from_str(s: &str) -> anyhow::Result<Self> {
113            match s.to_lowercase().as_str() {
114                "none" | "uncompressed" => Ok(Self::None),
115                "snappy" => Ok(Self::Snappy),
116                "gzip" => Ok(Self::Gzip),
117                "zstd" => Ok(Self::Zstd),
118                _ => anyhow::bail!(
119                    "Unknown Parquet compression '{}'. Valid options: none, snappy, gzip, zstd",
120                    s
121                ),
122            }
123        }
124    }
125    #[derive(Debug, Clone, Default)]
126    pub struct ParquetWriteOptions {
127        pub compression: ParquetCompression,
128        pub row_group_size: Option<usize>,
129    }
130    pub struct ParquetWriter {
131        _options: ParquetWriteOptions,
132    }
133    impl ParquetWriter {
134        pub fn new(options: ParquetWriteOptions) -> Self {
135            Self { _options: options }
136        }
137        pub fn write_to_bytes(&self, _value: &Value) -> anyhow::Result<Vec<u8>> {
138            anyhow::bail!(MSG)
139        }
140    }
141    /// Stub for arrow_value_to_value when parquet feature is disabled.
142    pub fn arrow_value_to_value(_array: &dyn std::any::Any, _idx: usize) -> Value {
143        Value::Null
144    }
145}
146
147/// SQLite database reader.
148#[cfg(feature = "sqlite")]
149pub mod sqlite;
150#[cfg(not(feature = "sqlite"))]
151pub mod sqlite {
152    //! Stub module — SQLite feature not enabled.
153    use crate::value::Value;
154    use std::path::Path;
155
156    const MSG: &str = "SQLite support requires the 'sqlite' feature.\n  Install with: cargo install dkit --features sqlite";
157
158    #[derive(Debug, Clone, Default)]
159    pub struct SqliteOptions {
160        pub table: Option<String>,
161        pub sql: Option<String>,
162    }
163    pub struct SqliteReader {
164        _options: SqliteOptions,
165    }
166    impl SqliteReader {
167        pub fn new(options: SqliteOptions) -> Self {
168            Self { _options: options }
169        }
170        pub fn read_from_path(&self, _path: &Path) -> anyhow::Result<Value> {
171            anyhow::bail!(MSG)
172        }
173        pub fn list_tables(_path: &Path) -> anyhow::Result<Vec<String>> {
174            anyhow::bail!(MSG)
175        }
176    }
177}
178
179/// Excel (XLSX) reader.
180#[cfg(feature = "excel")]
181pub mod xlsx;
182#[cfg(not(feature = "excel"))]
183pub mod xlsx {
184    //! Stub module — Excel feature not enabled.
185    use crate::value::Value;
186
187    const MSG: &str = "Excel support requires the 'excel' feature.\n  Install with: cargo install dkit --features excel";
188
189    #[derive(Debug, Clone, Default)]
190    pub struct XlsxOptions {
191        pub sheet: Option<String>,
192        pub header_row: usize,
193    }
194    pub struct XlsxReader {
195        _options: XlsxOptions,
196    }
197    impl XlsxReader {
198        pub fn new(options: XlsxOptions) -> Self {
199            Self { _options: options }
200        }
201        pub fn read_from_bytes(&self, _bytes: &[u8]) -> anyhow::Result<Value> {
202            anyhow::bail!(MSG)
203        }
204        pub fn list_sheets(_bytes: &[u8]) -> anyhow::Result<Vec<String>> {
205            anyhow::bail!(MSG)
206        }
207    }
208}
209
210/// HCL (HashiCorp Configuration Language) reader and writer.
211#[cfg(feature = "hcl")]
212pub mod hcl;
213#[cfg(not(feature = "hcl"))]
214pub mod hcl {
215    //! Stub module — HCL feature not enabled.
216    use super::{FormatReader, FormatWriter};
217    use crate::value::Value;
218    use std::io::{Read, Write};
219
220    const MSG: &str = "HCL support requires the 'hcl' feature.\n  Install with: cargo install dkit --features hcl";
221
222    pub struct HclReader;
223    impl FormatReader for HclReader {
224        fn read(&self, _: &str) -> anyhow::Result<Value> {
225            anyhow::bail!(MSG)
226        }
227        fn read_from_reader(&self, _: impl Read) -> anyhow::Result<Value> {
228            anyhow::bail!(MSG)
229        }
230    }
231    pub struct HclWriter;
232    impl FormatWriter for HclWriter {
233        fn write(&self, _: &Value) -> anyhow::Result<String> {
234            anyhow::bail!(MSG)
235        }
236        fn write_to_writer(&self, _: &Value, _: impl Write) -> anyhow::Result<()> {
237            anyhow::bail!(MSG)
238        }
239    }
240}
241
242/// macOS Property List (plist) reader and writer.
243#[cfg(feature = "plist")]
244pub mod plist;
245#[cfg(not(feature = "plist"))]
246pub mod plist {
247    //! Stub module — plist feature not enabled.
248    use super::{FormatReader, FormatWriter};
249    use crate::value::Value;
250    use std::io::{Read, Write};
251
252    const MSG: &str = "Plist support requires the 'plist' feature.\n  Install with: cargo install dkit --features plist";
253
254    pub struct PlistReader;
255    impl FormatReader for PlistReader {
256        fn read(&self, _: &str) -> anyhow::Result<Value> {
257            anyhow::bail!(MSG)
258        }
259        fn read_from_reader(&self, _: impl Read) -> anyhow::Result<Value> {
260            anyhow::bail!(MSG)
261        }
262    }
263    pub struct PlistWriter;
264    impl FormatWriter for PlistWriter {
265        fn write(&self, _: &Value) -> anyhow::Result<String> {
266            anyhow::bail!(MSG)
267        }
268        fn write_to_writer(&self, _: &Value, _: impl Write) -> anyhow::Result<()> {
269            anyhow::bail!(MSG)
270        }
271    }
272}
273
274/// XML reader and writer.
275#[cfg(feature = "xml")]
276pub mod xml;
277#[cfg(not(feature = "xml"))]
278pub mod xml {
279    //! Stub module — XML feature not enabled.
280    use super::{FormatReader, FormatWriter};
281    use crate::value::Value;
282    use std::io::{Read, Write};
283
284    const MSG: &str = "XML support requires the 'xml' feature.\n  Install with: cargo install dkit --features xml";
285
286    #[derive(Default)]
287    pub struct XmlReader {
288        _private: (),
289    }
290    impl XmlReader {
291        #[allow(dead_code)]
292        pub fn new(_strip_namespaces: bool) -> Self {
293            Self { _private: () }
294        }
295    }
296    impl FormatReader for XmlReader {
297        fn read(&self, _: &str) -> anyhow::Result<Value> {
298            anyhow::bail!(MSG)
299        }
300        fn read_from_reader(&self, _: impl Read) -> anyhow::Result<Value> {
301            anyhow::bail!(MSG)
302        }
303    }
304    pub struct XmlWriter {
305        _private: (),
306    }
307    impl XmlWriter {
308        pub fn new(_pretty: bool, _root_element: Option<String>) -> Self {
309            Self { _private: () }
310        }
311    }
312    impl FormatWriter for XmlWriter {
313        fn write(&self, _: &Value) -> anyhow::Result<String> {
314            anyhow::bail!(MSG)
315        }
316        fn write_to_writer(&self, _: &Value, _: impl Write) -> anyhow::Result<()> {
317            anyhow::bail!(MSG)
318        }
319    }
320}
321
322use std::io::{Read, Write};
323use std::path::Path;
324
325use crate::error::DkitError;
326use crate::value::Value;
327
328/// Supported data formats for reading and writing.
329///
330/// Each variant represents a data serialization format that dkit can
331/// convert to or from the unified [`Value`] model.
332#[derive(Debug, Clone, Copy, PartialEq)]
333#[non_exhaustive]
334pub enum Format {
335    /// JSON (`*.json`)
336    Json,
337    /// JSON Lines / NDJSON (`*.jsonl`, `*.ndjson`)
338    Jsonl,
339    /// Comma/Tab-separated values (`*.csv`, `*.tsv`)
340    Csv,
341    /// YAML (`*.yaml`, `*.yml`)
342    Yaml,
343    /// TOML (`*.toml`)
344    Toml,
345    /// XML (`*.xml`)
346    Xml,
347    /// MessagePack binary format (`*.msgpack`)
348    Msgpack,
349    /// Excel spreadsheet (`*.xlsx`, read-only)
350    Xlsx,
351    /// SQLite database (`*.sqlite`, read-only)
352    Sqlite,
353    /// Apache Parquet columnar format (`*.parquet`)
354    Parquet,
355    /// Markdown table (write-only)
356    Markdown,
357    /// HTML table (write-only)
358    Html,
359    /// Terminal table (write-only, used by `dkit view`)
360    Table,
361    /// .env file format (`*.env`, `.env.*`)
362    Env,
363    /// INI/CFG configuration file format (`*.ini`, `*.cfg`)
364    Ini,
365    /// Java `.properties` file format (`*.properties`)
366    Properties,
367    /// HCL (HashiCorp Configuration Language) (`*.hcl`, `*.tf`, `*.tfvars`)
368    Hcl,
369    /// macOS Property List (`*.plist`)
370    Plist,
371}
372
373impl Format {
374    #[allow(clippy::should_implement_trait)]
375    pub fn from_str(s: &str) -> Result<Self, DkitError> {
376        match s.to_lowercase().as_str() {
377            "json" => Ok(Format::Json),
378            "jsonl" | "jsonlines" | "ndjson" => Ok(Format::Jsonl),
379            "csv" | "tsv" => Ok(Format::Csv),
380            "yaml" | "yml" => Ok(Format::Yaml),
381            "toml" => Ok(Format::Toml),
382            "xml" => Ok(Format::Xml),
383            "msgpack" | "messagepack" => Ok(Format::Msgpack),
384            "xlsx" | "excel" | "xls" => Ok(Format::Xlsx),
385            "sqlite" | "sqlite3" | "db" => Ok(Format::Sqlite),
386            "parquet" | "pq" => Ok(Format::Parquet),
387            "md" | "markdown" => Ok(Format::Markdown),
388            "html" => Ok(Format::Html),
389            "table" => Ok(Format::Table),
390            "env" | "dotenv" => Ok(Format::Env),
391            "ini" | "cfg" | "conf" | "config" => Ok(Format::Ini),
392            "properties" => Ok(Format::Properties),
393            "hcl" | "tf" | "tfvars" => Ok(Format::Hcl),
394            "plist" => Ok(Format::Plist),
395            _ => Err(DkitError::UnknownFormat(s.to_string())),
396        }
397    }
398
399    /// 사용 가능한 출력 포맷 목록을 반환한다
400    pub fn list_output_formats() -> Vec<(&'static str, &'static str)> {
401        let mut formats = vec![
402            ("json", "JSON format"),
403            ("csv", "Comma-separated values"),
404            ("tsv", "Tab-separated values (CSV variant)"),
405            ("yaml", "YAML format"),
406            ("toml", "TOML format"),
407            ("jsonl", "JSON Lines (one JSON object per line)"),
408        ];
409
410        if cfg!(feature = "xml") {
411            formats.push(("xml", "XML format"));
412        } else {
413            formats.push(("xml", "XML format (requires --features xml)"));
414        }
415        if cfg!(feature = "msgpack") {
416            formats.push(("msgpack", "MessagePack binary format"));
417        } else {
418            formats.push((
419                "msgpack",
420                "MessagePack binary format (requires --features msgpack)",
421            ));
422        }
423        if cfg!(feature = "excel") {
424            formats.push(("xlsx", "Excel spreadsheet (input only)"));
425        } else {
426            formats.push(("xlsx", "Excel spreadsheet (requires --features excel)"));
427        }
428        if cfg!(feature = "sqlite") {
429            formats.push(("sqlite", "SQLite database (input only)"));
430        } else {
431            formats.push(("sqlite", "SQLite database (requires --features sqlite)"));
432        }
433        if cfg!(feature = "parquet") {
434            formats.push(("parquet", "Apache Parquet columnar format"));
435        } else {
436            formats.push((
437                "parquet",
438                "Apache Parquet columnar format (requires --features parquet)",
439            ));
440        }
441
442        if cfg!(feature = "hcl") {
443            formats.push(("hcl", "HCL (HashiCorp Configuration Language)"));
444        } else {
445            formats.push((
446                "hcl",
447                "HCL (HashiCorp Configuration Language) (requires --features hcl)",
448            ));
449        }
450
451        if cfg!(feature = "plist") {
452            formats.push(("plist", "macOS Property List format"));
453        } else {
454            formats.push((
455                "plist",
456                "macOS Property List format (requires --features plist)",
457            ));
458        }
459
460        formats.push(("env", "Environment variables (.env) format"));
461        formats.push(("ini", "INI/CFG configuration file format"));
462        formats.push(("properties", "Java .properties file format"));
463        formats.push(("md", "Markdown table"));
464        formats.push(("html", "HTML table"));
465        formats.push(("table", "Terminal table (default for view)"));
466
467        formats
468    }
469}
470
471impl std::fmt::Display for Format {
472    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
473        match self {
474            Format::Json => write!(f, "JSON"),
475            Format::Jsonl => write!(f, "JSONL"),
476            Format::Csv => write!(f, "CSV"),
477            Format::Yaml => write!(f, "YAML"),
478            Format::Toml => write!(f, "TOML"),
479            Format::Xml => write!(f, "XML"),
480            Format::Msgpack => write!(f, "MessagePack"),
481            Format::Xlsx => write!(f, "Excel"),
482            Format::Sqlite => write!(f, "SQLite"),
483            Format::Parquet => write!(f, "Parquet"),
484            Format::Markdown => write!(f, "Markdown"),
485            Format::Html => write!(f, "HTML"),
486            Format::Table => write!(f, "Table"),
487            Format::Env => write!(f, "ENV"),
488            Format::Ini => write!(f, "INI"),
489            Format::Properties => write!(f, "Properties"),
490            Format::Hcl => write!(f, "HCL"),
491            Format::Plist => write!(f, "Plist"),
492        }
493    }
494}
495
496/// 파일 확장자로 포맷을 자동 감지
497pub fn detect_format(path: &Path) -> Result<Format, DkitError> {
498    // .env 파일 감지: .env, .env.local, .env.development 등
499    if let Some(name) = path.file_name().and_then(|n| n.to_str()) {
500        if name == ".env" || name.starts_with(".env.") {
501            return Ok(Format::Env);
502        }
503    }
504
505    match path.extension().and_then(|e| e.to_str()) {
506        Some("json") => Ok(Format::Json),
507        Some("jsonl" | "ndjson") => Ok(Format::Jsonl),
508        Some("csv" | "tsv") => Ok(Format::Csv),
509        Some("yaml" | "yml") => Ok(Format::Yaml),
510        Some("toml") => Ok(Format::Toml),
511        Some("xml") => Ok(Format::Xml),
512        Some("msgpack") => Ok(Format::Msgpack),
513        Some("xlsx" | "xls" | "xlsm" | "xlsb" | "ods") => Ok(Format::Xlsx),
514        Some("db" | "sqlite" | "sqlite3") => Ok(Format::Sqlite),
515        Some("parquet" | "pq") => Ok(Format::Parquet),
516        Some("md") => Ok(Format::Markdown),
517        Some("html") => Ok(Format::Html),
518        Some("env") => Ok(Format::Env),
519        Some("ini" | "cfg") => Ok(Format::Ini),
520        Some("properties") => Ok(Format::Properties),
521        Some("hcl" | "tf" | "tfvars") => Ok(Format::Hcl),
522        Some("plist") => Ok(Format::Plist),
523        Some(ext) => Err(DkitError::UnknownFormat(ext.to_string())),
524        None => Err(DkitError::UnknownFormat("(no extension)".to_string())),
525    }
526}
527
528/// 콘텐츠 스니핑으로 포맷을 자동 감지
529///
530/// 감지 우선순위:
531/// 1. `<?xml` → XML
532/// 2. 첫 줄이 JSON 객체 + 둘째 줄도 JSON 객체 → JSONL
533/// 3. `{` 또는 `[` 시작 → JSON
534/// 4. 탭 구분자가 포함된 구조적 데이터 → CSV (TSV)
535/// 5. TOML 패턴 (키 = 값, [섹션])
536/// 6. YAML 패턴 (키: 값, ---)
537pub fn detect_format_from_content(content: &str) -> Result<(Format, Option<char>), DkitError> {
538    let trimmed = content.trim_start();
539
540    if trimmed.is_empty() {
541        return Err(DkitError::FormatDetectionFailed(
542            "input is empty".to_string(),
543        ));
544    }
545
546    // Plist: <?xml followed by <!DOCTYPE plist or <plist
547    if trimmed.starts_with("<?xml") || trimmed.starts_with("<!DOCTYPE") {
548        if trimmed.contains("<!DOCTYPE plist") || trimmed.contains("<plist") {
549            return Ok((Format::Plist, None));
550        }
551        return Ok((Format::Xml, None));
552    }
553
554    // JSONL: 첫째 줄과 둘째 줄 모두 JSON 객체
555    let mut lines = trimmed.lines().filter(|l| !l.trim().is_empty());
556    if let Some(first_line) = lines.next() {
557        if let Some(second_line) = lines.next() {
558            let first_trimmed = first_line.trim();
559            let second_trimmed = second_line.trim();
560            if first_trimmed.starts_with('{')
561                && first_trimmed.ends_with('}')
562                && second_trimmed.starts_with('{')
563                && second_trimmed.ends_with('}')
564            {
565                return Ok((Format::Jsonl, None));
566            }
567        }
568    }
569
570    // JSON: { 로 시작 (단일 객체)
571    if trimmed.starts_with('{') {
572        return Ok((Format::Json, None));
573    }
574
575    // [ 로 시작: JSON 배열 vs TOML 섹션 헤더 구분
576    // TOML 섹션: [word] 형태 (내부가 알파벳/밑줄/점/하이픈)
577    // JSON 배열: [값, ...] 또는 여러 줄에 걸친 배열
578    if trimmed.starts_with('[') {
579        let first_line = trimmed.lines().next().unwrap_or("").trim();
580        // TOML 섹션 헤더: [section] 또는 [[array]]
581        let is_toml_section = first_line.starts_with("[[")
582            || (first_line.starts_with('[')
583                && first_line.ends_with(']')
584                && !first_line.contains(',')
585                && first_line[1..first_line.len() - 1].chars().all(|c| {
586                    c.is_alphanumeric() || c == '_' || c == '-' || c == '.' || c == ' ' || c == '"'
587                }));
588        if is_toml_section {
589            return Ok((Format::Toml, None));
590        }
591        return Ok((Format::Json, None));
592    }
593
594    // XML: < 로 시작하는 태그 (<?xml 없이 바로 태그로 시작하는 경우)
595    if trimmed.starts_with('<') {
596        return Ok((Format::Xml, None));
597    }
598
599    // TSV: 첫째 줄에 탭이 포함되어 있으면 TSV로 간주
600    if let Some(first_line) = trimmed.lines().next() {
601        if first_line.contains('\t') {
602            return Ok((Format::Csv, Some('\t')));
603        }
604    }
605
606    // ENV: KEY=VALUE 패턴 (대문자 키, = 주변에 공백 없음)
607    // TOML과 구별: TOML은 " = " (공백 포함), ENV는 "KEY=value" (공백 없음, 대문자)
608    let first_line = trimmed.lines().next().unwrap_or("");
609    let ft = first_line.trim();
610    let env_line = ft.strip_prefix("export ").unwrap_or(ft);
611    if let Some(eq_pos) = env_line.find('=') {
612        let key_part = env_line[..eq_pos].trim();
613        if !key_part.is_empty()
614            && !key_part.contains(' ')
615            && key_part
616                .chars()
617                .all(|c| c.is_ascii_uppercase() || c.is_ascii_digit() || c == '_')
618        {
619            // 여러 줄이 모두 ENV 패턴인지 확인
620            let env_lines = trimmed
621                .lines()
622                .filter(|l| {
623                    let t = l.trim();
624                    !t.is_empty() && !t.starts_with('#')
625                })
626                .take(5);
627            let all_env = env_lines.clone().all(|l| {
628                let l = l.trim().strip_prefix("export ").unwrap_or(l.trim());
629                if let Some(p) = l.find('=') {
630                    let k = l[..p].trim();
631                    !k.is_empty()
632                        && !k.contains(' ')
633                        && k.chars()
634                            .all(|c| c.is_ascii_uppercase() || c.is_ascii_digit() || c == '_')
635                } else {
636                    false
637                }
638            });
639            if all_env {
640                return Ok((Format::Env, None));
641            }
642        }
643    }
644
645    // TOML: key = value 패턴 (섹션 헤더는 위에서 처리됨)
646    if ft.contains(" = ") {
647        return Ok((Format::Toml, None));
648    }
649
650    // YAML: --- 또는 key: value 패턴
651    if ft.starts_with("---") || ft.contains(": ") || ft.ends_with(':') {
652        return Ok((Format::Yaml, None));
653    }
654
655    // CSV: 콤마가 포함된 구조적 데이터
656    if ft.contains(',') {
657        return Ok((Format::Csv, None));
658    }
659
660    Err(DkitError::FormatDetectionFailed(
661        "could not determine format from content".to_string(),
662    ))
663}
664
665/// 파일 확장자에 따른 기본 delimiter 반환
666/// `.tsv` 파일은 탭 구분자를 사용한다.
667pub fn default_delimiter(path: &Path) -> Option<char> {
668    match path.extension().and_then(|e| e.to_str()) {
669        Some("tsv") => Some('\t'),
670        _ => None,
671    }
672}
673
674/// `--to` 포맷 문자열에 따른 기본 delimiter 반환
675pub fn default_delimiter_for_format(format_str: &str) -> Option<char> {
676    match format_str.to_lowercase().as_str() {
677        "tsv" => Some('\t'),
678        _ => None,
679    }
680}
681
682/// Format-specific options controlling how data is read or written.
683///
684/// Use [`Default::default()`] to get sensible defaults.
685#[derive(Debug, Clone)]
686pub struct FormatOptions {
687    /// CSV delimiter (기본: ',')
688    pub delimiter: Option<char>,
689    /// CSV 헤더 없음 모드
690    pub no_header: bool,
691    /// Pretty-print 출력
692    pub pretty: bool,
693    /// Compact 출력 (JSON)
694    pub compact: bool,
695    /// YAML inline/flow 스타일
696    pub flow_style: bool,
697    /// XML 루트 엘리먼트 이름 (기본: "root")
698    pub root_element: Option<String>,
699    /// HTML 인라인 CSS 스타일 포함
700    pub styled: bool,
701    /// HTML 완전한 문서 출력
702    pub full_html: bool,
703    /// JSON 들여쓰기 설정 (숫자: 스페이스 수, "tab": 탭 문자)
704    pub indent: Option<String>,
705    /// JSON 오브젝트 키를 알파벳순으로 정렬
706    pub sort_keys: bool,
707}
708
709impl Default for FormatOptions {
710    fn default() -> Self {
711        Self {
712            delimiter: None,
713            no_header: false,
714            pretty: true,
715            compact: false,
716            flow_style: false,
717            root_element: None,
718            styled: false,
719            full_html: false,
720            indent: None,
721            sort_keys: false,
722        }
723    }
724}
725
726/// Trait for reading a data format into a [`Value`].
727///
728/// Implement this trait to add support for reading a new data format.
729#[allow(dead_code)]
730pub trait FormatReader {
731    /// Parse the given string content and return a [`Value`].
732    fn read(&self, input: &str) -> anyhow::Result<Value>;
733
734    /// Parse data from an [`io::Read`](std::io::Read) source and return a [`Value`].
735    fn read_from_reader(&self, reader: impl Read) -> anyhow::Result<Value>;
736}
737
738/// Trait for writing a [`Value`] to a data format.
739///
740/// Implement this trait to add support for writing a new data format.
741#[allow(dead_code)]
742pub trait FormatWriter {
743    /// Serialize the given [`Value`] and return the formatted string.
744    fn write(&self, value: &Value) -> anyhow::Result<String>;
745
746    /// Serialize the given [`Value`] and write to an [`io::Write`](std::io::Write) destination.
747    fn write_to_writer(&self, value: &Value, writer: impl Write) -> anyhow::Result<()>;
748}
749
750#[cfg(test)]
751mod tests {
752    use super::*;
753    use std::path::PathBuf;
754
755    // --- Format::from_str ---
756
757    #[test]
758    fn test_format_from_str() {
759        assert_eq!(Format::from_str("json").unwrap(), Format::Json);
760        assert_eq!(Format::from_str("JSON").unwrap(), Format::Json);
761        assert_eq!(Format::from_str("csv").unwrap(), Format::Csv);
762        assert_eq!(Format::from_str("tsv").unwrap(), Format::Csv);
763        assert_eq!(Format::from_str("TSV").unwrap(), Format::Csv);
764        assert_eq!(Format::from_str("yaml").unwrap(), Format::Yaml);
765        assert_eq!(Format::from_str("yml").unwrap(), Format::Yaml);
766        assert_eq!(Format::from_str("toml").unwrap(), Format::Toml);
767    }
768
769    #[test]
770    fn test_format_from_str_jsonl() {
771        assert_eq!(Format::from_str("jsonl").unwrap(), Format::Jsonl);
772        assert_eq!(Format::from_str("jsonlines").unwrap(), Format::Jsonl);
773        assert_eq!(Format::from_str("ndjson").unwrap(), Format::Jsonl);
774        assert_eq!(Format::from_str("JSONL").unwrap(), Format::Jsonl);
775    }
776
777    #[test]
778    fn test_format_from_str_xml() {
779        assert_eq!(Format::from_str("xml").unwrap(), Format::Xml);
780    }
781
782    #[test]
783    fn test_format_from_str_msgpack() {
784        assert_eq!(Format::from_str("msgpack").unwrap(), Format::Msgpack);
785        assert_eq!(Format::from_str("messagepack").unwrap(), Format::Msgpack);
786    }
787
788    #[test]
789    fn test_format_from_str_markdown() {
790        assert_eq!(Format::from_str("md").unwrap(), Format::Markdown);
791        assert_eq!(Format::from_str("markdown").unwrap(), Format::Markdown);
792        assert_eq!(Format::from_str("MD").unwrap(), Format::Markdown);
793    }
794
795    #[test]
796    fn test_format_from_str_unknown() {
797        let err = Format::from_str("bin").unwrap_err();
798        assert!(matches!(err, DkitError::UnknownFormat(s) if s == "bin"));
799    }
800
801    // --- Format::Display ---
802
803    #[test]
804    fn test_format_display() {
805        assert_eq!(Format::Json.to_string(), "JSON");
806        assert_eq!(Format::Csv.to_string(), "CSV");
807        assert_eq!(Format::Yaml.to_string(), "YAML");
808        assert_eq!(Format::Toml.to_string(), "TOML");
809        assert_eq!(Format::Jsonl.to_string(), "JSONL");
810        assert_eq!(Format::Xml.to_string(), "XML");
811        assert_eq!(Format::Msgpack.to_string(), "MessagePack");
812        assert_eq!(Format::Markdown.to_string(), "Markdown");
813        assert_eq!(Format::Table.to_string(), "Table");
814    }
815
816    #[test]
817    fn test_format_from_str_table() {
818        assert_eq!(Format::from_str("table").unwrap(), Format::Table);
819        assert_eq!(Format::from_str("TABLE").unwrap(), Format::Table);
820    }
821
822    #[test]
823    fn test_list_output_formats() {
824        let formats = Format::list_output_formats();
825        assert!(formats.len() >= 10);
826        assert!(formats.iter().any(|(name, _)| *name == "table"));
827        assert!(formats.iter().any(|(name, _)| *name == "json"));
828    }
829
830    // --- detect_format ---
831
832    #[test]
833    fn test_detect_format_json() {
834        assert_eq!(
835            detect_format(&PathBuf::from("data.json")).unwrap(),
836            Format::Json
837        );
838    }
839
840    #[test]
841    fn test_detect_format_csv_tsv() {
842        assert_eq!(
843            detect_format(&PathBuf::from("data.csv")).unwrap(),
844            Format::Csv
845        );
846        assert_eq!(
847            detect_format(&PathBuf::from("data.tsv")).unwrap(),
848            Format::Csv
849        );
850    }
851
852    #[test]
853    fn test_detect_format_yaml() {
854        assert_eq!(
855            detect_format(&PathBuf::from("data.yaml")).unwrap(),
856            Format::Yaml
857        );
858        assert_eq!(
859            detect_format(&PathBuf::from("data.yml")).unwrap(),
860            Format::Yaml
861        );
862    }
863
864    #[test]
865    fn test_detect_format_toml() {
866        assert_eq!(
867            detect_format(&PathBuf::from("config.toml")).unwrap(),
868            Format::Toml
869        );
870    }
871
872    #[test]
873    fn test_detect_format_jsonl() {
874        assert_eq!(
875            detect_format(&PathBuf::from("data.jsonl")).unwrap(),
876            Format::Jsonl
877        );
878        assert_eq!(
879            detect_format(&PathBuf::from("data.ndjson")).unwrap(),
880            Format::Jsonl
881        );
882    }
883
884    #[test]
885    fn test_detect_format_xml() {
886        assert_eq!(
887            detect_format(&PathBuf::from("data.xml")).unwrap(),
888            Format::Xml
889        );
890    }
891
892    #[test]
893    fn test_detect_format_msgpack() {
894        assert_eq!(
895            detect_format(&PathBuf::from("data.msgpack")).unwrap(),
896            Format::Msgpack
897        );
898    }
899
900    #[test]
901    fn test_detect_format_markdown() {
902        assert_eq!(
903            detect_format(&PathBuf::from("output.md")).unwrap(),
904            Format::Markdown
905        );
906    }
907
908    #[test]
909    fn test_detect_format_unknown_ext() {
910        let err = detect_format(&PathBuf::from("data.bin")).unwrap_err();
911        assert!(matches!(err, DkitError::UnknownFormat(s) if s == "bin"));
912    }
913
914    #[test]
915    fn test_detect_format_no_extension() {
916        let err = detect_format(&PathBuf::from("Makefile")).unwrap_err();
917        assert!(matches!(err, DkitError::UnknownFormat(s) if s == "(no extension)"));
918    }
919
920    // --- FormatOptions ---
921
922    // --- default_delimiter ---
923
924    #[test]
925    fn test_default_delimiter_tsv() {
926        assert_eq!(default_delimiter(&PathBuf::from("data.tsv")), Some('\t'));
927    }
928
929    #[test]
930    fn test_default_delimiter_csv() {
931        assert_eq!(default_delimiter(&PathBuf::from("data.csv")), None);
932    }
933
934    #[test]
935    fn test_default_delimiter_json() {
936        assert_eq!(default_delimiter(&PathBuf::from("data.json")), None);
937    }
938
939    #[test]
940    fn test_default_delimiter_for_format_tsv() {
941        assert_eq!(default_delimiter_for_format("tsv"), Some('\t'));
942        assert_eq!(default_delimiter_for_format("TSV"), Some('\t'));
943    }
944
945    #[test]
946    fn test_default_delimiter_for_format_csv() {
947        assert_eq!(default_delimiter_for_format("csv"), None);
948    }
949
950    // --- FormatOptions ---
951
952    #[test]
953    fn test_format_options_default() {
954        let opts = FormatOptions::default();
955        assert_eq!(opts.delimiter, None);
956        assert!(!opts.no_header);
957        assert!(opts.pretty);
958        assert!(!opts.compact);
959        assert!(!opts.flow_style);
960        assert_eq!(opts.root_element, None);
961    }
962
963    // --- detect_format_from_content ---
964
965    #[test]
966    fn test_sniff_xml_declaration() {
967        let (fmt, delim) = detect_format_from_content("<?xml version=\"1.0\"?>\n<root/>").unwrap();
968        assert_eq!(fmt, Format::Xml);
969        assert_eq!(delim, None);
970    }
971
972    #[test]
973    fn test_sniff_xml_tag() {
974        let (fmt, _) = detect_format_from_content("<root><item>hello</item></root>").unwrap();
975        assert_eq!(fmt, Format::Xml);
976    }
977
978    #[test]
979    fn test_sniff_json_object() {
980        let (fmt, _) = detect_format_from_content("{\"name\": \"Alice\"}").unwrap();
981        assert_eq!(fmt, Format::Json);
982    }
983
984    #[test]
985    fn test_sniff_json_array() {
986        let (fmt, _) = detect_format_from_content("[1, 2, 3]").unwrap();
987        assert_eq!(fmt, Format::Json);
988    }
989
990    #[test]
991    fn test_sniff_jsonl() {
992        let content = "{\"name\": \"Alice\"}\n{\"name\": \"Bob\"}\n";
993        let (fmt, _) = detect_format_from_content(content).unwrap();
994        assert_eq!(fmt, Format::Jsonl);
995    }
996
997    #[test]
998    fn test_sniff_tsv() {
999        let content = "name\tage\tcity\nAlice\t30\tSeoul\n";
1000        let (fmt, delim) = detect_format_from_content(content).unwrap();
1001        assert_eq!(fmt, Format::Csv);
1002        assert_eq!(delim, Some('\t'));
1003    }
1004
1005    #[test]
1006    fn test_sniff_toml_section() {
1007        let content = "[database]\nhost = \"localhost\"\nport = 5432\n";
1008        let (fmt, _) = detect_format_from_content(content).unwrap();
1009        assert_eq!(fmt, Format::Toml);
1010    }
1011
1012    #[test]
1013    fn test_sniff_toml_key_value() {
1014        let content = "title = \"My App\"\nversion = \"1.0\"\n";
1015        let (fmt, _) = detect_format_from_content(content).unwrap();
1016        assert_eq!(fmt, Format::Toml);
1017    }
1018
1019    #[test]
1020    fn test_sniff_yaml_document() {
1021        let content = "---\nname: Alice\nage: 30\n";
1022        let (fmt, _) = detect_format_from_content(content).unwrap();
1023        assert_eq!(fmt, Format::Yaml);
1024    }
1025
1026    #[test]
1027    fn test_sniff_yaml_key_value() {
1028        let content = "name: Alice\nage: 30\n";
1029        let (fmt, _) = detect_format_from_content(content).unwrap();
1030        assert_eq!(fmt, Format::Yaml);
1031    }
1032
1033    #[test]
1034    fn test_sniff_csv() {
1035        let content = "name,age,city\nAlice,30,Seoul\n";
1036        let (fmt, delim) = detect_format_from_content(content).unwrap();
1037        assert_eq!(fmt, Format::Csv);
1038        assert_eq!(delim, None);
1039    }
1040
1041    #[test]
1042    fn test_sniff_empty_content() {
1043        let err = detect_format_from_content("").unwrap_err();
1044        assert!(matches!(err, DkitError::FormatDetectionFailed(_)));
1045    }
1046
1047    #[test]
1048    fn test_sniff_whitespace_only() {
1049        let err = detect_format_from_content("   \n  \n").unwrap_err();
1050        assert!(matches!(err, DkitError::FormatDetectionFailed(_)));
1051    }
1052}