Skip to main content

dkit_core/format/
mod.rs

1/// CSV/TSV reader and writer.
2pub mod csv;
3/// .env file reader and writer.
4pub mod env;
5/// HTML table writer.
6pub mod html;
7/// INI/CFG configuration file reader and writer.
8pub mod ini;
9/// JSON reader, writer, and value conversion utilities.
10pub mod json;
11/// JSON Lines (NDJSON) reader and writer.
12pub mod jsonl;
13/// Log file reader (Apache, nginx, syslog, custom patterns).
14pub mod log;
15/// Markdown table writer.
16pub mod markdown;
17/// Java `.properties` file reader and writer.
18pub mod properties;
19/// TOML reader and writer.
20pub mod toml;
21/// YAML reader and writer.
22pub mod yaml;
23
24// --- Feature-gated format modules ---
25
26/// MessagePack binary reader and writer.
27#[cfg(feature = "msgpack")]
28pub mod msgpack;
29#[cfg(not(feature = "msgpack"))]
30pub mod msgpack {
31    //! Stub module — MessagePack feature not enabled.
32    use super::{FormatReader, FormatWriter};
33    use crate::value::Value;
34    use std::io::{Read, Write};
35
36    const MSG: &str = "MessagePack support requires the 'msgpack' feature.\n  Install with: cargo install dkit --features msgpack";
37
38    pub struct MsgpackReader;
39    impl MsgpackReader {
40        pub fn read_from_bytes(&self, _bytes: &[u8]) -> anyhow::Result<Value> {
41            anyhow::bail!(MSG)
42        }
43    }
44    impl FormatReader for MsgpackReader {
45        fn read(&self, _: &str) -> anyhow::Result<Value> {
46            anyhow::bail!(MSG)
47        }
48        fn read_from_reader(&self, _: impl Read) -> anyhow::Result<Value> {
49            anyhow::bail!(MSG)
50        }
51    }
52    pub struct MsgpackWriter;
53    impl MsgpackWriter {
54        pub fn write_bytes(&self, _value: &Value) -> anyhow::Result<Vec<u8>> {
55            anyhow::bail!(MSG)
56        }
57    }
58    impl FormatWriter for MsgpackWriter {
59        fn write(&self, _: &Value) -> anyhow::Result<String> {
60            anyhow::bail!(MSG)
61        }
62        fn write_to_writer(&self, _: &Value, _: impl Write) -> anyhow::Result<()> {
63            anyhow::bail!(MSG)
64        }
65    }
66}
67
68/// Apache Parquet columnar format reader and writer.
69#[cfg(feature = "parquet")]
70pub mod parquet;
71#[cfg(not(feature = "parquet"))]
72pub mod parquet {
73    //! Stub module — Parquet feature not enabled.
74    use crate::value::Value;
75
76    const MSG: &str = "Parquet support requires the 'parquet' feature.\n  Install with: cargo install dkit --features parquet";
77
78    #[derive(Debug, Clone, Default)]
79    pub struct ParquetOptions {
80        pub row_group: Option<usize>,
81    }
82    pub struct ParquetReader {
83        _options: ParquetOptions,
84    }
85    impl ParquetReader {
86        pub fn new(options: ParquetOptions) -> Self {
87            Self { _options: options }
88        }
89        pub fn read_from_bytes(&self, _bytes: &[u8]) -> anyhow::Result<Value> {
90            anyhow::bail!(MSG)
91        }
92        #[allow(dead_code)]
93        pub fn read_metadata(_bytes: &[u8]) -> anyhow::Result<ParquetMetadata> {
94            anyhow::bail!(MSG)
95        }
96    }
97    #[allow(dead_code)]
98    pub struct ParquetMetadata {
99        pub num_rows: usize,
100        pub num_row_groups: usize,
101        pub columns: Vec<String>,
102        pub column_types: Vec<String>,
103    }
104    #[derive(Debug, Clone, Default)]
105    pub enum ParquetCompression {
106        #[default]
107        None,
108        Snappy,
109        Gzip,
110        Zstd,
111    }
112    impl std::str::FromStr for ParquetCompression {
113        type Err = anyhow::Error;
114        fn from_str(s: &str) -> anyhow::Result<Self> {
115            match s.to_lowercase().as_str() {
116                "none" | "uncompressed" => Ok(Self::None),
117                "snappy" => Ok(Self::Snappy),
118                "gzip" => Ok(Self::Gzip),
119                "zstd" => Ok(Self::Zstd),
120                _ => anyhow::bail!(
121                    "Unknown Parquet compression '{}'. Valid options: none, snappy, gzip, zstd",
122                    s
123                ),
124            }
125        }
126    }
127    #[derive(Debug, Clone, Default)]
128    pub struct ParquetWriteOptions {
129        pub compression: ParquetCompression,
130        pub row_group_size: Option<usize>,
131    }
132    pub struct ParquetWriter {
133        _options: ParquetWriteOptions,
134    }
135    impl ParquetWriter {
136        pub fn new(options: ParquetWriteOptions) -> Self {
137            Self { _options: options }
138        }
139        pub fn write_to_bytes(&self, _value: &Value) -> anyhow::Result<Vec<u8>> {
140            anyhow::bail!(MSG)
141        }
142    }
143    /// Stub for arrow_value_to_value when parquet feature is disabled.
144    pub fn arrow_value_to_value(_array: &dyn std::any::Any, _idx: usize) -> Value {
145        Value::Null
146    }
147}
148
149/// SQLite database reader.
150#[cfg(feature = "sqlite")]
151pub mod sqlite;
152#[cfg(not(feature = "sqlite"))]
153pub mod sqlite {
154    //! Stub module — SQLite feature not enabled.
155    use crate::value::Value;
156    use std::path::Path;
157
158    const MSG: &str = "SQLite support requires the 'sqlite' feature.\n  Install with: cargo install dkit --features sqlite";
159
160    #[derive(Debug, Clone, Default)]
161    pub struct SqliteOptions {
162        pub table: Option<String>,
163        pub sql: Option<String>,
164    }
165    pub struct SqliteReader {
166        _options: SqliteOptions,
167    }
168    impl SqliteReader {
169        pub fn new(options: SqliteOptions) -> Self {
170            Self { _options: options }
171        }
172        pub fn read_from_path(&self, _path: &Path) -> anyhow::Result<Value> {
173            anyhow::bail!(MSG)
174        }
175        pub fn list_tables(_path: &Path) -> anyhow::Result<Vec<String>> {
176            anyhow::bail!(MSG)
177        }
178    }
179}
180
181/// Excel (XLSX) reader.
182#[cfg(feature = "excel")]
183pub mod xlsx;
184#[cfg(not(feature = "excel"))]
185pub mod xlsx {
186    //! Stub module — Excel feature not enabled.
187    use crate::value::Value;
188
189    const MSG: &str = "Excel support requires the 'excel' feature.\n  Install with: cargo install dkit --features excel";
190
191    #[derive(Debug, Clone, Default)]
192    pub struct XlsxOptions {
193        pub sheet: Option<String>,
194        pub header_row: usize,
195    }
196    pub struct XlsxReader {
197        _options: XlsxOptions,
198    }
199    impl XlsxReader {
200        pub fn new(options: XlsxOptions) -> Self {
201            Self { _options: options }
202        }
203        pub fn read_from_bytes(&self, _bytes: &[u8]) -> anyhow::Result<Value> {
204            anyhow::bail!(MSG)
205        }
206        pub fn list_sheets(_bytes: &[u8]) -> anyhow::Result<Vec<String>> {
207            anyhow::bail!(MSG)
208        }
209    }
210}
211
212/// HCL (HashiCorp Configuration Language) reader and writer.
213#[cfg(feature = "hcl")]
214pub mod hcl;
215#[cfg(not(feature = "hcl"))]
216pub mod hcl {
217    //! Stub module — HCL feature not enabled.
218    use super::{FormatReader, FormatWriter};
219    use crate::value::Value;
220    use std::io::{Read, Write};
221
222    const MSG: &str = "HCL support requires the 'hcl' feature.\n  Install with: cargo install dkit --features hcl";
223
224    pub struct HclReader;
225    impl FormatReader for HclReader {
226        fn read(&self, _: &str) -> anyhow::Result<Value> {
227            anyhow::bail!(MSG)
228        }
229        fn read_from_reader(&self, _: impl Read) -> anyhow::Result<Value> {
230            anyhow::bail!(MSG)
231        }
232    }
233    pub struct HclWriter;
234    impl FormatWriter for HclWriter {
235        fn write(&self, _: &Value) -> anyhow::Result<String> {
236            anyhow::bail!(MSG)
237        }
238        fn write_to_writer(&self, _: &Value, _: impl Write) -> anyhow::Result<()> {
239            anyhow::bail!(MSG)
240        }
241    }
242}
243
244/// macOS Property List (plist) reader and writer.
245#[cfg(feature = "plist")]
246pub mod plist;
247#[cfg(not(feature = "plist"))]
248pub mod plist {
249    //! Stub module — plist feature not enabled.
250    use super::{FormatReader, FormatWriter};
251    use crate::value::Value;
252    use std::io::{Read, Write};
253
254    const MSG: &str = "Plist support requires the 'plist' feature.\n  Install with: cargo install dkit --features plist";
255
256    pub struct PlistReader;
257    impl FormatReader for PlistReader {
258        fn read(&self, _: &str) -> anyhow::Result<Value> {
259            anyhow::bail!(MSG)
260        }
261        fn read_from_reader(&self, _: impl Read) -> anyhow::Result<Value> {
262            anyhow::bail!(MSG)
263        }
264    }
265    pub struct PlistWriter;
266    impl FormatWriter for PlistWriter {
267        fn write(&self, _: &Value) -> anyhow::Result<String> {
268            anyhow::bail!(MSG)
269        }
270        fn write_to_writer(&self, _: &Value, _: impl Write) -> anyhow::Result<()> {
271            anyhow::bail!(MSG)
272        }
273    }
274}
275
276/// XML reader and writer.
277#[cfg(feature = "xml")]
278pub mod xml;
279#[cfg(not(feature = "xml"))]
280pub mod xml {
281    //! Stub module — XML feature not enabled.
282    use super::{FormatReader, FormatWriter};
283    use crate::value::Value;
284    use std::io::{Read, Write};
285
286    const MSG: &str = "XML support requires the 'xml' feature.\n  Install with: cargo install dkit --features xml";
287
288    #[derive(Default)]
289    pub struct XmlReader {
290        _private: (),
291    }
292    impl XmlReader {
293        #[allow(dead_code)]
294        pub fn new(_strip_namespaces: bool) -> Self {
295            Self { _private: () }
296        }
297    }
298    impl FormatReader for XmlReader {
299        fn read(&self, _: &str) -> anyhow::Result<Value> {
300            anyhow::bail!(MSG)
301        }
302        fn read_from_reader(&self, _: impl Read) -> anyhow::Result<Value> {
303            anyhow::bail!(MSG)
304        }
305    }
306    pub struct XmlWriter {
307        _private: (),
308    }
309    impl XmlWriter {
310        pub fn new(_pretty: bool, _root_element: Option<String>) -> Self {
311            Self { _private: () }
312        }
313    }
314    impl FormatWriter for XmlWriter {
315        fn write(&self, _: &Value) -> anyhow::Result<String> {
316            anyhow::bail!(MSG)
317        }
318        fn write_to_writer(&self, _: &Value, _: impl Write) -> anyhow::Result<()> {
319            anyhow::bail!(MSG)
320        }
321    }
322}
323
324use std::io::{Read, Write};
325use std::path::Path;
326
327use crate::error::DkitError;
328use crate::value::Value;
329
330/// Supported data formats for reading and writing.
331///
332/// Each variant represents a data serialization format that dkit can
333/// convert to or from the unified [`Value`] model.
334#[derive(Debug, Clone, Copy, PartialEq)]
335#[non_exhaustive]
336pub enum Format {
337    /// JSON (`*.json`)
338    Json,
339    /// JSON Lines / NDJSON (`*.jsonl`, `*.ndjson`)
340    Jsonl,
341    /// Comma/Tab-separated values (`*.csv`, `*.tsv`)
342    Csv,
343    /// YAML (`*.yaml`, `*.yml`)
344    Yaml,
345    /// TOML (`*.toml`)
346    Toml,
347    /// XML (`*.xml`)
348    Xml,
349    /// MessagePack binary format (`*.msgpack`)
350    Msgpack,
351    /// Excel spreadsheet (`*.xlsx`, read-only)
352    Xlsx,
353    /// SQLite database (`*.sqlite`, read-only)
354    Sqlite,
355    /// Apache Parquet columnar format (`*.parquet`)
356    Parquet,
357    /// Markdown table (write-only)
358    Markdown,
359    /// HTML table (write-only)
360    Html,
361    /// Terminal table (write-only, used by `dkit view`)
362    Table,
363    /// .env file format (`*.env`, `.env.*`)
364    Env,
365    /// INI/CFG configuration file format (`*.ini`, `*.cfg`)
366    Ini,
367    /// Java `.properties` file format (`*.properties`)
368    Properties,
369    /// HCL (HashiCorp Configuration Language) (`*.hcl`, `*.tf`, `*.tfvars`)
370    Hcl,
371    /// macOS Property List (`*.plist`)
372    Plist,
373}
374
375impl Format {
376    #[allow(clippy::should_implement_trait)]
377    pub fn from_str(s: &str) -> Result<Self, DkitError> {
378        match s.to_lowercase().as_str() {
379            "json" => Ok(Format::Json),
380            "jsonl" | "jsonlines" | "ndjson" => Ok(Format::Jsonl),
381            "csv" | "tsv" => Ok(Format::Csv),
382            "yaml" | "yml" => Ok(Format::Yaml),
383            "toml" => Ok(Format::Toml),
384            "xml" => Ok(Format::Xml),
385            "msgpack" | "messagepack" => Ok(Format::Msgpack),
386            "xlsx" | "excel" | "xls" => Ok(Format::Xlsx),
387            "sqlite" | "sqlite3" | "db" => Ok(Format::Sqlite),
388            "parquet" | "pq" => Ok(Format::Parquet),
389            "md" | "markdown" => Ok(Format::Markdown),
390            "html" => Ok(Format::Html),
391            "table" => Ok(Format::Table),
392            "env" | "dotenv" => Ok(Format::Env),
393            "ini" | "cfg" | "conf" | "config" => Ok(Format::Ini),
394            "properties" => Ok(Format::Properties),
395            "hcl" | "tf" | "tfvars" => Ok(Format::Hcl),
396            "plist" => Ok(Format::Plist),
397            _ => Err(DkitError::UnknownFormat(s.to_string())),
398        }
399    }
400
401    /// 사용 가능한 출력 포맷 목록을 반환한다
402    pub fn list_output_formats() -> Vec<(&'static str, &'static str)> {
403        let mut formats = vec![
404            ("json", "JSON format"),
405            ("csv", "Comma-separated values"),
406            ("tsv", "Tab-separated values (CSV variant)"),
407            ("yaml", "YAML format"),
408            ("toml", "TOML format"),
409            ("jsonl", "JSON Lines (one JSON object per line)"),
410        ];
411
412        if cfg!(feature = "xml") {
413            formats.push(("xml", "XML format"));
414        } else {
415            formats.push(("xml", "XML format (requires --features xml)"));
416        }
417        if cfg!(feature = "msgpack") {
418            formats.push(("msgpack", "MessagePack binary format"));
419        } else {
420            formats.push((
421                "msgpack",
422                "MessagePack binary format (requires --features msgpack)",
423            ));
424        }
425        if cfg!(feature = "excel") {
426            formats.push(("xlsx", "Excel spreadsheet (input only)"));
427        } else {
428            formats.push(("xlsx", "Excel spreadsheet (requires --features excel)"));
429        }
430        if cfg!(feature = "sqlite") {
431            formats.push(("sqlite", "SQLite database (input only)"));
432        } else {
433            formats.push(("sqlite", "SQLite database (requires --features sqlite)"));
434        }
435        if cfg!(feature = "parquet") {
436            formats.push(("parquet", "Apache Parquet columnar format"));
437        } else {
438            formats.push((
439                "parquet",
440                "Apache Parquet columnar format (requires --features parquet)",
441            ));
442        }
443
444        if cfg!(feature = "hcl") {
445            formats.push(("hcl", "HCL (HashiCorp Configuration Language)"));
446        } else {
447            formats.push((
448                "hcl",
449                "HCL (HashiCorp Configuration Language) (requires --features hcl)",
450            ));
451        }
452
453        if cfg!(feature = "plist") {
454            formats.push(("plist", "macOS Property List format"));
455        } else {
456            formats.push((
457                "plist",
458                "macOS Property List format (requires --features plist)",
459            ));
460        }
461
462        formats.push(("env", "Environment variables (.env) format"));
463        formats.push(("ini", "INI/CFG configuration file format"));
464        formats.push(("properties", "Java .properties file format"));
465        formats.push(("md", "Markdown table"));
466        formats.push(("html", "HTML table"));
467        formats.push(("table", "Terminal table (default for view)"));
468
469        formats
470    }
471}
472
473impl std::fmt::Display for Format {
474    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
475        match self {
476            Format::Json => write!(f, "JSON"),
477            Format::Jsonl => write!(f, "JSONL"),
478            Format::Csv => write!(f, "CSV"),
479            Format::Yaml => write!(f, "YAML"),
480            Format::Toml => write!(f, "TOML"),
481            Format::Xml => write!(f, "XML"),
482            Format::Msgpack => write!(f, "MessagePack"),
483            Format::Xlsx => write!(f, "Excel"),
484            Format::Sqlite => write!(f, "SQLite"),
485            Format::Parquet => write!(f, "Parquet"),
486            Format::Markdown => write!(f, "Markdown"),
487            Format::Html => write!(f, "HTML"),
488            Format::Table => write!(f, "Table"),
489            Format::Env => write!(f, "ENV"),
490            Format::Ini => write!(f, "INI"),
491            Format::Properties => write!(f, "Properties"),
492            Format::Hcl => write!(f, "HCL"),
493            Format::Plist => write!(f, "Plist"),
494        }
495    }
496}
497
498/// 파일 확장자로 포맷을 자동 감지
499pub fn detect_format(path: &Path) -> Result<Format, DkitError> {
500    // .env 파일 감지: .env, .env.local, .env.development 등
501    if let Some(name) = path.file_name().and_then(|n| n.to_str()) {
502        if name == ".env" || name.starts_with(".env.") {
503            return Ok(Format::Env);
504        }
505    }
506
507    match path.extension().and_then(|e| e.to_str()) {
508        Some("json") => Ok(Format::Json),
509        Some("jsonl" | "ndjson") => Ok(Format::Jsonl),
510        Some("csv" | "tsv") => Ok(Format::Csv),
511        Some("yaml" | "yml") => Ok(Format::Yaml),
512        Some("toml") => Ok(Format::Toml),
513        Some("xml") => Ok(Format::Xml),
514        Some("msgpack") => Ok(Format::Msgpack),
515        Some("xlsx" | "xls" | "xlsm" | "xlsb" | "ods") => Ok(Format::Xlsx),
516        Some("db" | "sqlite" | "sqlite3") => Ok(Format::Sqlite),
517        Some("parquet" | "pq") => Ok(Format::Parquet),
518        Some("md") => Ok(Format::Markdown),
519        Some("html") => Ok(Format::Html),
520        Some("env") => Ok(Format::Env),
521        Some("ini" | "cfg") => Ok(Format::Ini),
522        Some("properties") => Ok(Format::Properties),
523        Some("hcl" | "tf" | "tfvars") => Ok(Format::Hcl),
524        Some("plist") => Ok(Format::Plist),
525        Some(ext) => Err(DkitError::UnknownFormat(ext.to_string())),
526        None => Err(DkitError::UnknownFormat("(no extension)".to_string())),
527    }
528}
529
530/// 콘텐츠 스니핑으로 포맷을 자동 감지
531///
532/// 감지 우선순위:
533/// 1. `<?xml` → XML
534/// 2. 첫 줄이 JSON 객체 + 둘째 줄도 JSON 객체 → JSONL
535/// 3. `{` 또는 `[` 시작 → JSON
536/// 4. 탭 구분자가 포함된 구조적 데이터 → CSV (TSV)
537/// 5. TOML 패턴 (키 = 값, [섹션])
538/// 6. YAML 패턴 (키: 값, ---)
539pub fn detect_format_from_content(content: &str) -> Result<(Format, Option<char>), DkitError> {
540    let trimmed = content.trim_start();
541
542    if trimmed.is_empty() {
543        return Err(DkitError::FormatDetectionFailed(
544            "input is empty".to_string(),
545        ));
546    }
547
548    // Plist: <?xml followed by <!DOCTYPE plist or <plist
549    if trimmed.starts_with("<?xml") || trimmed.starts_with("<!DOCTYPE") {
550        if trimmed.contains("<!DOCTYPE plist") || trimmed.contains("<plist") {
551            return Ok((Format::Plist, None));
552        }
553        return Ok((Format::Xml, None));
554    }
555
556    // JSONL: 첫째 줄과 둘째 줄 모두 JSON 객체
557    let mut lines = trimmed.lines().filter(|l| !l.trim().is_empty());
558    if let Some(first_line) = lines.next() {
559        if let Some(second_line) = lines.next() {
560            let first_trimmed = first_line.trim();
561            let second_trimmed = second_line.trim();
562            if first_trimmed.starts_with('{')
563                && first_trimmed.ends_with('}')
564                && second_trimmed.starts_with('{')
565                && second_trimmed.ends_with('}')
566            {
567                return Ok((Format::Jsonl, None));
568            }
569        }
570    }
571
572    // JSON: { 로 시작 (단일 객체)
573    if trimmed.starts_with('{') {
574        return Ok((Format::Json, None));
575    }
576
577    // [ 로 시작: JSON 배열 vs TOML 섹션 헤더 구분
578    // TOML 섹션: [word] 형태 (내부가 알파벳/밑줄/점/하이픈)
579    // JSON 배열: [값, ...] 또는 여러 줄에 걸친 배열
580    if trimmed.starts_with('[') {
581        let first_line = trimmed.lines().next().unwrap_or("").trim();
582        // TOML 섹션 헤더: [section] 또는 [[array]]
583        let is_toml_section = first_line.starts_with("[[")
584            || (first_line.starts_with('[')
585                && first_line.ends_with(']')
586                && !first_line.contains(',')
587                && first_line[1..first_line.len() - 1].chars().all(|c| {
588                    c.is_alphanumeric() || c == '_' || c == '-' || c == '.' || c == ' ' || c == '"'
589                }));
590        if is_toml_section {
591            return Ok((Format::Toml, None));
592        }
593        return Ok((Format::Json, None));
594    }
595
596    // XML: < 로 시작하는 태그 (<?xml 없이 바로 태그로 시작하는 경우)
597    if trimmed.starts_with('<') {
598        return Ok((Format::Xml, None));
599    }
600
601    // TSV: 첫째 줄에 탭이 포함되어 있으면 TSV로 간주
602    if let Some(first_line) = trimmed.lines().next() {
603        if first_line.contains('\t') {
604            return Ok((Format::Csv, Some('\t')));
605        }
606    }
607
608    // ENV: KEY=VALUE 패턴 (대문자 키, = 주변에 공백 없음)
609    // TOML과 구별: TOML은 " = " (공백 포함), ENV는 "KEY=value" (공백 없음, 대문자)
610    let first_line = trimmed.lines().next().unwrap_or("");
611    let ft = first_line.trim();
612    let env_line = ft.strip_prefix("export ").unwrap_or(ft);
613    if let Some(eq_pos) = env_line.find('=') {
614        let key_part = env_line[..eq_pos].trim();
615        if !key_part.is_empty()
616            && !key_part.contains(' ')
617            && key_part
618                .chars()
619                .all(|c| c.is_ascii_uppercase() || c.is_ascii_digit() || c == '_')
620        {
621            // 여러 줄이 모두 ENV 패턴인지 확인
622            let env_lines = trimmed
623                .lines()
624                .filter(|l| {
625                    let t = l.trim();
626                    !t.is_empty() && !t.starts_with('#')
627                })
628                .take(5);
629            let all_env = env_lines.clone().all(|l| {
630                let l = l.trim().strip_prefix("export ").unwrap_or(l.trim());
631                if let Some(p) = l.find('=') {
632                    let k = l[..p].trim();
633                    !k.is_empty()
634                        && !k.contains(' ')
635                        && k.chars()
636                            .all(|c| c.is_ascii_uppercase() || c.is_ascii_digit() || c == '_')
637                } else {
638                    false
639                }
640            });
641            if all_env {
642                return Ok((Format::Env, None));
643            }
644        }
645    }
646
647    // TOML: key = value 패턴 (섹션 헤더는 위에서 처리됨)
648    if ft.contains(" = ") {
649        return Ok((Format::Toml, None));
650    }
651
652    // YAML: --- 또는 key: value 패턴
653    if ft.starts_with("---") || ft.contains(": ") || ft.ends_with(':') {
654        return Ok((Format::Yaml, None));
655    }
656
657    // CSV: 콤마가 포함된 구조적 데이터
658    if ft.contains(',') {
659        return Ok((Format::Csv, None));
660    }
661
662    Err(DkitError::FormatDetectionFailed(
663        "could not determine format from content".to_string(),
664    ))
665}
666
667/// 파일 확장자에 따른 기본 delimiter 반환
668/// `.tsv` 파일은 탭 구분자를 사용한다.
669pub fn default_delimiter(path: &Path) -> Option<char> {
670    match path.extension().and_then(|e| e.to_str()) {
671        Some("tsv") => Some('\t'),
672        _ => None,
673    }
674}
675
676/// `--to` 포맷 문자열에 따른 기본 delimiter 반환
677pub fn default_delimiter_for_format(format_str: &str) -> Option<char> {
678    match format_str.to_lowercase().as_str() {
679        "tsv" => Some('\t'),
680        _ => None,
681    }
682}
683
684/// Format-specific options controlling how data is read or written.
685///
686/// Use [`Default::default()`] to get sensible defaults.
687#[derive(Debug, Clone)]
688pub struct FormatOptions {
689    /// CSV delimiter (기본: ',')
690    pub delimiter: Option<char>,
691    /// CSV 헤더 없음 모드
692    pub no_header: bool,
693    /// Pretty-print 출력
694    pub pretty: bool,
695    /// Compact 출력 (JSON)
696    pub compact: bool,
697    /// YAML inline/flow 스타일
698    pub flow_style: bool,
699    /// XML 루트 엘리먼트 이름 (기본: "root")
700    pub root_element: Option<String>,
701    /// HTML 인라인 CSS 스타일 포함
702    pub styled: bool,
703    /// HTML 완전한 문서 출력
704    pub full_html: bool,
705    /// JSON 들여쓰기 설정 (숫자: 스페이스 수, "tab": 탭 문자)
706    pub indent: Option<String>,
707    /// JSON 오브젝트 키를 알파벳순으로 정렬
708    pub sort_keys: bool,
709}
710
711impl Default for FormatOptions {
712    fn default() -> Self {
713        Self {
714            delimiter: None,
715            no_header: false,
716            pretty: true,
717            compact: false,
718            flow_style: false,
719            root_element: None,
720            styled: false,
721            full_html: false,
722            indent: None,
723            sort_keys: false,
724        }
725    }
726}
727
728/// Trait for reading a data format into a [`Value`].
729///
730/// Implement this trait to add support for reading a new data format.
731#[allow(dead_code)]
732pub trait FormatReader {
733    /// Parse the given string content and return a [`Value`].
734    fn read(&self, input: &str) -> anyhow::Result<Value>;
735
736    /// Parse data from an [`io::Read`](std::io::Read) source and return a [`Value`].
737    fn read_from_reader(&self, reader: impl Read) -> anyhow::Result<Value>;
738}
739
740/// Trait for writing a [`Value`] to a data format.
741///
742/// Implement this trait to add support for writing a new data format.
743#[allow(dead_code)]
744pub trait FormatWriter {
745    /// Serialize the given [`Value`] and return the formatted string.
746    fn write(&self, value: &Value) -> anyhow::Result<String>;
747
748    /// Serialize the given [`Value`] and write to an [`io::Write`](std::io::Write) destination.
749    fn write_to_writer(&self, value: &Value, writer: impl Write) -> anyhow::Result<()>;
750}
751
752#[cfg(test)]
753mod tests {
754    use super::*;
755    use std::path::PathBuf;
756
757    // --- Format::from_str ---
758
759    #[test]
760    fn test_format_from_str() {
761        assert_eq!(Format::from_str("json").unwrap(), Format::Json);
762        assert_eq!(Format::from_str("JSON").unwrap(), Format::Json);
763        assert_eq!(Format::from_str("csv").unwrap(), Format::Csv);
764        assert_eq!(Format::from_str("tsv").unwrap(), Format::Csv);
765        assert_eq!(Format::from_str("TSV").unwrap(), Format::Csv);
766        assert_eq!(Format::from_str("yaml").unwrap(), Format::Yaml);
767        assert_eq!(Format::from_str("yml").unwrap(), Format::Yaml);
768        assert_eq!(Format::from_str("toml").unwrap(), Format::Toml);
769    }
770
771    #[test]
772    fn test_format_from_str_jsonl() {
773        assert_eq!(Format::from_str("jsonl").unwrap(), Format::Jsonl);
774        assert_eq!(Format::from_str("jsonlines").unwrap(), Format::Jsonl);
775        assert_eq!(Format::from_str("ndjson").unwrap(), Format::Jsonl);
776        assert_eq!(Format::from_str("JSONL").unwrap(), Format::Jsonl);
777    }
778
779    #[test]
780    fn test_format_from_str_xml() {
781        assert_eq!(Format::from_str("xml").unwrap(), Format::Xml);
782    }
783
784    #[test]
785    fn test_format_from_str_msgpack() {
786        assert_eq!(Format::from_str("msgpack").unwrap(), Format::Msgpack);
787        assert_eq!(Format::from_str("messagepack").unwrap(), Format::Msgpack);
788    }
789
790    #[test]
791    fn test_format_from_str_markdown() {
792        assert_eq!(Format::from_str("md").unwrap(), Format::Markdown);
793        assert_eq!(Format::from_str("markdown").unwrap(), Format::Markdown);
794        assert_eq!(Format::from_str("MD").unwrap(), Format::Markdown);
795    }
796
797    #[test]
798    fn test_format_from_str_unknown() {
799        let err = Format::from_str("bin").unwrap_err();
800        assert!(matches!(err, DkitError::UnknownFormat(s) if s == "bin"));
801    }
802
803    // --- Format::Display ---
804
805    #[test]
806    fn test_format_display() {
807        assert_eq!(Format::Json.to_string(), "JSON");
808        assert_eq!(Format::Csv.to_string(), "CSV");
809        assert_eq!(Format::Yaml.to_string(), "YAML");
810        assert_eq!(Format::Toml.to_string(), "TOML");
811        assert_eq!(Format::Jsonl.to_string(), "JSONL");
812        assert_eq!(Format::Xml.to_string(), "XML");
813        assert_eq!(Format::Msgpack.to_string(), "MessagePack");
814        assert_eq!(Format::Markdown.to_string(), "Markdown");
815        assert_eq!(Format::Table.to_string(), "Table");
816    }
817
818    #[test]
819    fn test_format_from_str_table() {
820        assert_eq!(Format::from_str("table").unwrap(), Format::Table);
821        assert_eq!(Format::from_str("TABLE").unwrap(), Format::Table);
822    }
823
824    #[test]
825    fn test_list_output_formats() {
826        let formats = Format::list_output_formats();
827        assert!(formats.len() >= 10);
828        assert!(formats.iter().any(|(name, _)| *name == "table"));
829        assert!(formats.iter().any(|(name, _)| *name == "json"));
830    }
831
832    // --- detect_format ---
833
834    #[test]
835    fn test_detect_format_json() {
836        assert_eq!(
837            detect_format(&PathBuf::from("data.json")).unwrap(),
838            Format::Json
839        );
840    }
841
842    #[test]
843    fn test_detect_format_csv_tsv() {
844        assert_eq!(
845            detect_format(&PathBuf::from("data.csv")).unwrap(),
846            Format::Csv
847        );
848        assert_eq!(
849            detect_format(&PathBuf::from("data.tsv")).unwrap(),
850            Format::Csv
851        );
852    }
853
854    #[test]
855    fn test_detect_format_yaml() {
856        assert_eq!(
857            detect_format(&PathBuf::from("data.yaml")).unwrap(),
858            Format::Yaml
859        );
860        assert_eq!(
861            detect_format(&PathBuf::from("data.yml")).unwrap(),
862            Format::Yaml
863        );
864    }
865
866    #[test]
867    fn test_detect_format_toml() {
868        assert_eq!(
869            detect_format(&PathBuf::from("config.toml")).unwrap(),
870            Format::Toml
871        );
872    }
873
874    #[test]
875    fn test_detect_format_jsonl() {
876        assert_eq!(
877            detect_format(&PathBuf::from("data.jsonl")).unwrap(),
878            Format::Jsonl
879        );
880        assert_eq!(
881            detect_format(&PathBuf::from("data.ndjson")).unwrap(),
882            Format::Jsonl
883        );
884    }
885
886    #[test]
887    fn test_detect_format_xml() {
888        assert_eq!(
889            detect_format(&PathBuf::from("data.xml")).unwrap(),
890            Format::Xml
891        );
892    }
893
894    #[test]
895    fn test_detect_format_msgpack() {
896        assert_eq!(
897            detect_format(&PathBuf::from("data.msgpack")).unwrap(),
898            Format::Msgpack
899        );
900    }
901
902    #[test]
903    fn test_detect_format_markdown() {
904        assert_eq!(
905            detect_format(&PathBuf::from("output.md")).unwrap(),
906            Format::Markdown
907        );
908    }
909
910    #[test]
911    fn test_detect_format_unknown_ext() {
912        let err = detect_format(&PathBuf::from("data.bin")).unwrap_err();
913        assert!(matches!(err, DkitError::UnknownFormat(s) if s == "bin"));
914    }
915
916    #[test]
917    fn test_detect_format_no_extension() {
918        let err = detect_format(&PathBuf::from("Makefile")).unwrap_err();
919        assert!(matches!(err, DkitError::UnknownFormat(s) if s == "(no extension)"));
920    }
921
922    // --- FormatOptions ---
923
924    // --- default_delimiter ---
925
926    #[test]
927    fn test_default_delimiter_tsv() {
928        assert_eq!(default_delimiter(&PathBuf::from("data.tsv")), Some('\t'));
929    }
930
931    #[test]
932    fn test_default_delimiter_csv() {
933        assert_eq!(default_delimiter(&PathBuf::from("data.csv")), None);
934    }
935
936    #[test]
937    fn test_default_delimiter_json() {
938        assert_eq!(default_delimiter(&PathBuf::from("data.json")), None);
939    }
940
941    #[test]
942    fn test_default_delimiter_for_format_tsv() {
943        assert_eq!(default_delimiter_for_format("tsv"), Some('\t'));
944        assert_eq!(default_delimiter_for_format("TSV"), Some('\t'));
945    }
946
947    #[test]
948    fn test_default_delimiter_for_format_csv() {
949        assert_eq!(default_delimiter_for_format("csv"), None);
950    }
951
952    // --- FormatOptions ---
953
954    #[test]
955    fn test_format_options_default() {
956        let opts = FormatOptions::default();
957        assert_eq!(opts.delimiter, None);
958        assert!(!opts.no_header);
959        assert!(opts.pretty);
960        assert!(!opts.compact);
961        assert!(!opts.flow_style);
962        assert_eq!(opts.root_element, None);
963    }
964
965    // --- detect_format_from_content ---
966
967    #[test]
968    fn test_sniff_xml_declaration() {
969        let (fmt, delim) = detect_format_from_content("<?xml version=\"1.0\"?>\n<root/>").unwrap();
970        assert_eq!(fmt, Format::Xml);
971        assert_eq!(delim, None);
972    }
973
974    #[test]
975    fn test_sniff_xml_tag() {
976        let (fmt, _) = detect_format_from_content("<root><item>hello</item></root>").unwrap();
977        assert_eq!(fmt, Format::Xml);
978    }
979
980    #[test]
981    fn test_sniff_json_object() {
982        let (fmt, _) = detect_format_from_content("{\"name\": \"Alice\"}").unwrap();
983        assert_eq!(fmt, Format::Json);
984    }
985
986    #[test]
987    fn test_sniff_json_array() {
988        let (fmt, _) = detect_format_from_content("[1, 2, 3]").unwrap();
989        assert_eq!(fmt, Format::Json);
990    }
991
992    #[test]
993    fn test_sniff_jsonl() {
994        let content = "{\"name\": \"Alice\"}\n{\"name\": \"Bob\"}\n";
995        let (fmt, _) = detect_format_from_content(content).unwrap();
996        assert_eq!(fmt, Format::Jsonl);
997    }
998
999    #[test]
1000    fn test_sniff_tsv() {
1001        let content = "name\tage\tcity\nAlice\t30\tSeoul\n";
1002        let (fmt, delim) = detect_format_from_content(content).unwrap();
1003        assert_eq!(fmt, Format::Csv);
1004        assert_eq!(delim, Some('\t'));
1005    }
1006
1007    #[test]
1008    fn test_sniff_toml_section() {
1009        let content = "[database]\nhost = \"localhost\"\nport = 5432\n";
1010        let (fmt, _) = detect_format_from_content(content).unwrap();
1011        assert_eq!(fmt, Format::Toml);
1012    }
1013
1014    #[test]
1015    fn test_sniff_toml_key_value() {
1016        let content = "title = \"My App\"\nversion = \"1.0\"\n";
1017        let (fmt, _) = detect_format_from_content(content).unwrap();
1018        assert_eq!(fmt, Format::Toml);
1019    }
1020
1021    #[test]
1022    fn test_sniff_yaml_document() {
1023        let content = "---\nname: Alice\nage: 30\n";
1024        let (fmt, _) = detect_format_from_content(content).unwrap();
1025        assert_eq!(fmt, Format::Yaml);
1026    }
1027
1028    #[test]
1029    fn test_sniff_yaml_key_value() {
1030        let content = "name: Alice\nage: 30\n";
1031        let (fmt, _) = detect_format_from_content(content).unwrap();
1032        assert_eq!(fmt, Format::Yaml);
1033    }
1034
1035    #[test]
1036    fn test_sniff_csv() {
1037        let content = "name,age,city\nAlice,30,Seoul\n";
1038        let (fmt, delim) = detect_format_from_content(content).unwrap();
1039        assert_eq!(fmt, Format::Csv);
1040        assert_eq!(delim, None);
1041    }
1042
1043    #[test]
1044    fn test_sniff_empty_content() {
1045        let err = detect_format_from_content("").unwrap_err();
1046        assert!(matches!(err, DkitError::FormatDetectionFailed(_)));
1047    }
1048
1049    #[test]
1050    fn test_sniff_whitespace_only() {
1051        let err = detect_format_from_content("   \n  \n").unwrap_err();
1052        assert!(matches!(err, DkitError::FormatDetectionFailed(_)));
1053    }
1054}