Skip to main content

dkit_core/format/
mod.rs

1/// CSV/TSV reader and writer.
2pub mod csv;
3/// .env file reader and writer.
4pub mod env;
5/// HTML table writer.
6pub mod html;
7/// INI/CFG configuration file reader and writer.
8pub mod ini;
9/// JSON reader, writer, and value conversion utilities.
10pub mod json;
11/// JSON Lines (NDJSON) reader and writer.
12pub mod jsonl;
13/// Log file reader (Apache, nginx, syslog, custom patterns).
14pub mod log;
15/// Markdown table writer.
16pub mod markdown;
17/// Java `.properties` file reader and writer.
18pub mod properties;
19/// TOML reader and writer.
20pub mod toml;
21/// YAML reader and writer.
22pub mod yaml;
23
24// --- Feature-gated format modules ---
25
26/// MessagePack binary reader and writer.
27#[cfg(feature = "msgpack")]
28pub mod msgpack;
29#[cfg(not(feature = "msgpack"))]
30pub mod msgpack {
31    //! Stub module — MessagePack feature not enabled.
32    use super::{FormatReader, FormatWriter};
33    use crate::value::Value;
34    use std::io::{Read, Write};
35
36    const MSG: &str = "MessagePack support requires the 'msgpack' feature.\n  Install with: cargo install dkit --features msgpack";
37
38    pub struct MsgpackReader;
39    impl MsgpackReader {
40        pub fn read_from_bytes(&self, _bytes: &[u8]) -> anyhow::Result<Value> {
41            anyhow::bail!(MSG)
42        }
43    }
44    impl FormatReader for MsgpackReader {
45        fn read(&self, _: &str) -> anyhow::Result<Value> {
46            anyhow::bail!(MSG)
47        }
48        fn read_from_reader(&self, _: impl Read) -> anyhow::Result<Value> {
49            anyhow::bail!(MSG)
50        }
51    }
52    pub struct MsgpackWriter;
53    impl MsgpackWriter {
54        pub fn write_bytes(&self, _value: &Value) -> anyhow::Result<Vec<u8>> {
55            anyhow::bail!(MSG)
56        }
57    }
58    impl FormatWriter for MsgpackWriter {
59        fn write(&self, _: &Value) -> anyhow::Result<String> {
60            anyhow::bail!(MSG)
61        }
62        fn write_to_writer(&self, _: &Value, _: impl Write) -> anyhow::Result<()> {
63            anyhow::bail!(MSG)
64        }
65    }
66}
67
68/// Apache Parquet columnar format reader and writer.
69#[cfg(feature = "parquet")]
70pub mod parquet;
71#[cfg(not(feature = "parquet"))]
72pub mod parquet {
73    //! Stub module — Parquet feature not enabled.
74    use crate::value::Value;
75
76    const MSG: &str = "Parquet support requires the 'parquet' feature.\n  Install with: cargo install dkit --features parquet";
77
78    #[derive(Debug, Clone, Default)]
79    pub struct ParquetOptions {
80        pub row_group: Option<usize>,
81    }
82    pub struct ParquetReader {
83        _options: ParquetOptions,
84    }
85    impl ParquetReader {
86        pub fn new(options: ParquetOptions) -> Self {
87            Self { _options: options }
88        }
89        pub fn read_from_bytes(&self, _bytes: &[u8]) -> anyhow::Result<Value> {
90            anyhow::bail!(MSG)
91        }
92        #[allow(dead_code)]
93        pub fn read_metadata(_bytes: &[u8]) -> anyhow::Result<ParquetMetadata> {
94            anyhow::bail!(MSG)
95        }
96    }
97    #[allow(dead_code)]
98    pub struct ParquetMetadata {
99        pub num_rows: usize,
100        pub num_row_groups: usize,
101        pub columns: Vec<String>,
102        pub column_types: Vec<String>,
103    }
104    #[derive(Debug, Clone, Default)]
105    pub enum ParquetCompression {
106        #[default]
107        None,
108        Snappy,
109        Gzip,
110        Zstd,
111    }
112    impl std::str::FromStr for ParquetCompression {
113        type Err = anyhow::Error;
114        fn from_str(s: &str) -> anyhow::Result<Self> {
115            match s.to_lowercase().as_str() {
116                "none" | "uncompressed" => Ok(Self::None),
117                "snappy" => Ok(Self::Snappy),
118                "gzip" => Ok(Self::Gzip),
119                "zstd" => Ok(Self::Zstd),
120                _ => anyhow::bail!(
121                    "Unknown Parquet compression '{}'. Valid options: none, snappy, gzip, zstd",
122                    s
123                ),
124            }
125        }
126    }
127    #[derive(Debug, Clone, Default)]
128    pub struct ParquetWriteOptions {
129        pub compression: ParquetCompression,
130        pub row_group_size: Option<usize>,
131    }
132    pub struct ParquetWriter {
133        _options: ParquetWriteOptions,
134    }
135    impl ParquetWriter {
136        pub fn new(options: ParquetWriteOptions) -> Self {
137            Self { _options: options }
138        }
139        pub fn write_to_bytes(&self, _value: &Value) -> anyhow::Result<Vec<u8>> {
140            anyhow::bail!(MSG)
141        }
142    }
143    /// Stub for arrow_value_to_value when parquet feature is disabled.
144    pub fn arrow_value_to_value(_array: &dyn std::any::Any, _idx: usize) -> Value {
145        Value::Null
146    }
147}
148
149/// SQLite database reader.
150#[cfg(feature = "sqlite")]
151pub mod sqlite;
152#[cfg(not(feature = "sqlite"))]
153pub mod sqlite {
154    //! Stub module — SQLite feature not enabled.
155    use crate::value::Value;
156    use std::path::Path;
157
158    const MSG: &str = "SQLite support requires the 'sqlite' feature.\n  Install with: cargo install dkit --features sqlite";
159
160    #[derive(Debug, Clone, Default)]
161    pub struct SqliteOptions {
162        pub table: Option<String>,
163        pub sql: Option<String>,
164    }
165    pub struct SqliteReader {
166        _options: SqliteOptions,
167    }
168    impl SqliteReader {
169        pub fn new(options: SqliteOptions) -> Self {
170            Self { _options: options }
171        }
172        pub fn read_from_path(&self, _path: &Path) -> anyhow::Result<Value> {
173            anyhow::bail!(MSG)
174        }
175        pub fn list_tables(_path: &Path) -> anyhow::Result<Vec<String>> {
176            anyhow::bail!(MSG)
177        }
178    }
179}
180
181/// Excel (XLSX) reader.
182#[cfg(feature = "excel")]
183pub mod xlsx;
184#[cfg(not(feature = "excel"))]
185pub mod xlsx {
186    //! Stub module — Excel feature not enabled.
187    use crate::value::Value;
188
189    const MSG: &str = "Excel support requires the 'excel' feature.\n  Install with: cargo install dkit --features excel";
190
191    #[derive(Debug, Clone, Default)]
192    pub struct XlsxOptions {
193        pub sheet: Option<String>,
194        pub header_row: usize,
195    }
196    pub struct XlsxReader {
197        _options: XlsxOptions,
198    }
199    impl XlsxReader {
200        pub fn new(options: XlsxOptions) -> Self {
201            Self { _options: options }
202        }
203        pub fn read_from_bytes(&self, _bytes: &[u8]) -> anyhow::Result<Value> {
204            anyhow::bail!(MSG)
205        }
206        pub fn list_sheets(_bytes: &[u8]) -> anyhow::Result<Vec<String>> {
207            anyhow::bail!(MSG)
208        }
209    }
210}
211
212/// HCL (HashiCorp Configuration Language) reader and writer.
213#[cfg(feature = "hcl")]
214pub mod hcl;
215#[cfg(not(feature = "hcl"))]
216pub mod hcl {
217    //! Stub module — HCL feature not enabled.
218    use super::{FormatReader, FormatWriter};
219    use crate::value::Value;
220    use std::io::{Read, Write};
221
222    const MSG: &str = "HCL support requires the 'hcl' feature.\n  Install with: cargo install dkit --features hcl";
223
224    pub struct HclReader;
225    impl FormatReader for HclReader {
226        fn read(&self, _: &str) -> anyhow::Result<Value> {
227            anyhow::bail!(MSG)
228        }
229        fn read_from_reader(&self, _: impl Read) -> anyhow::Result<Value> {
230            anyhow::bail!(MSG)
231        }
232    }
233    pub struct HclWriter;
234    impl FormatWriter for HclWriter {
235        fn write(&self, _: &Value) -> anyhow::Result<String> {
236            anyhow::bail!(MSG)
237        }
238        fn write_to_writer(&self, _: &Value, _: impl Write) -> anyhow::Result<()> {
239            anyhow::bail!(MSG)
240        }
241    }
242}
243
244/// macOS Property List (plist) reader and writer.
245#[cfg(feature = "plist")]
246pub mod plist;
247#[cfg(not(feature = "plist"))]
248pub mod plist {
249    //! Stub module — plist feature not enabled.
250    use super::{FormatReader, FormatWriter};
251    use crate::value::Value;
252    use std::io::{Read, Write};
253
254    const MSG: &str = "Plist support requires the 'plist' feature.\n  Install with: cargo install dkit --features plist";
255
256    pub struct PlistReader;
257    impl FormatReader for PlistReader {
258        fn read(&self, _: &str) -> anyhow::Result<Value> {
259            anyhow::bail!(MSG)
260        }
261        fn read_from_reader(&self, _: impl Read) -> anyhow::Result<Value> {
262            anyhow::bail!(MSG)
263        }
264    }
265    pub struct PlistWriter;
266    impl FormatWriter for PlistWriter {
267        fn write(&self, _: &Value) -> anyhow::Result<String> {
268            anyhow::bail!(MSG)
269        }
270        fn write_to_writer(&self, _: &Value, _: impl Write) -> anyhow::Result<()> {
271            anyhow::bail!(MSG)
272        }
273    }
274}
275
276/// Template-based custom text output writer.
277#[cfg(feature = "template")]
278pub mod template;
279#[cfg(not(feature = "template"))]
280pub mod template {
281    //! Stub module — Template feature not enabled.
282    use super::FormatWriter;
283    use crate::value::Value;
284    use std::io::Write;
285
286    const MSG: &str = "Template support requires the 'template' feature.\n  Install with: cargo install dkit --features template";
287
288    pub struct TemplateWriter {
289        _private: (),
290    }
291    impl TemplateWriter {
292        pub fn new(_options: super::FormatOptions) -> Self {
293            Self { _private: () }
294        }
295    }
296    impl FormatWriter for TemplateWriter {
297        fn write(&self, _: &Value) -> anyhow::Result<String> {
298            anyhow::bail!(MSG)
299        }
300        fn write_to_writer(&self, _: &Value, _: impl Write) -> anyhow::Result<()> {
301            anyhow::bail!(MSG)
302        }
303    }
304}
305
306/// XML reader and writer.
307#[cfg(feature = "xml")]
308pub mod xml;
309#[cfg(not(feature = "xml"))]
310pub mod xml {
311    //! Stub module — XML feature not enabled.
312    use super::{FormatReader, FormatWriter};
313    use crate::value::Value;
314    use std::io::{Read, Write};
315
316    const MSG: &str = "XML support requires the 'xml' feature.\n  Install with: cargo install dkit --features xml";
317
318    #[derive(Default)]
319    pub struct XmlReader {
320        _private: (),
321    }
322    impl XmlReader {
323        #[allow(dead_code)]
324        pub fn new(_strip_namespaces: bool) -> Self {
325            Self { _private: () }
326        }
327    }
328    impl FormatReader for XmlReader {
329        fn read(&self, _: &str) -> anyhow::Result<Value> {
330            anyhow::bail!(MSG)
331        }
332        fn read_from_reader(&self, _: impl Read) -> anyhow::Result<Value> {
333            anyhow::bail!(MSG)
334        }
335    }
336    pub struct XmlWriter {
337        _private: (),
338    }
339    impl XmlWriter {
340        pub fn new(_pretty: bool, _root_element: Option<String>) -> Self {
341            Self { _private: () }
342        }
343    }
344    impl FormatWriter for XmlWriter {
345        fn write(&self, _: &Value) -> anyhow::Result<String> {
346            anyhow::bail!(MSG)
347        }
348        fn write_to_writer(&self, _: &Value, _: impl Write) -> anyhow::Result<()> {
349            anyhow::bail!(MSG)
350        }
351    }
352}
353
354use std::io::{Read, Write};
355use std::path::Path;
356
357use crate::error::DkitError;
358use crate::value::Value;
359
360/// Supported data formats for reading and writing.
361///
362/// Each variant represents a data serialization format that dkit can
363/// convert to or from the unified [`Value`] model.
364#[derive(Debug, Clone, Copy, PartialEq)]
365#[non_exhaustive]
366pub enum Format {
367    /// JSON (`*.json`)
368    Json,
369    /// JSON Lines / NDJSON (`*.jsonl`, `*.ndjson`)
370    Jsonl,
371    /// Comma/Tab-separated values (`*.csv`, `*.tsv`)
372    Csv,
373    /// YAML (`*.yaml`, `*.yml`)
374    Yaml,
375    /// TOML (`*.toml`)
376    Toml,
377    /// XML (`*.xml`)
378    Xml,
379    /// MessagePack binary format (`*.msgpack`)
380    Msgpack,
381    /// Excel spreadsheet (`*.xlsx`, read-only)
382    Xlsx,
383    /// SQLite database (`*.sqlite`, read-only)
384    Sqlite,
385    /// Apache Parquet columnar format (`*.parquet`)
386    Parquet,
387    /// Markdown table (write-only)
388    Markdown,
389    /// HTML table (write-only)
390    Html,
391    /// Terminal table (write-only, used by `dkit view`)
392    Table,
393    /// .env file format (`*.env`, `.env.*`)
394    Env,
395    /// INI/CFG configuration file format (`*.ini`, `*.cfg`)
396    Ini,
397    /// Java `.properties` file format (`*.properties`)
398    Properties,
399    /// HCL (HashiCorp Configuration Language) (`*.hcl`, `*.tf`, `*.tfvars`)
400    Hcl,
401    /// macOS Property List (`*.plist`)
402    Plist,
403    /// Template-based custom text output (write-only)
404    Template,
405}
406
407impl Format {
408    #[allow(clippy::should_implement_trait)]
409    pub fn from_str(s: &str) -> Result<Self, DkitError> {
410        match s.to_lowercase().as_str() {
411            "json" => Ok(Format::Json),
412            "jsonl" | "jsonlines" | "ndjson" => Ok(Format::Jsonl),
413            "csv" | "tsv" => Ok(Format::Csv),
414            "yaml" | "yml" => Ok(Format::Yaml),
415            "toml" => Ok(Format::Toml),
416            "xml" => Ok(Format::Xml),
417            "msgpack" | "messagepack" => Ok(Format::Msgpack),
418            "xlsx" | "excel" | "xls" => Ok(Format::Xlsx),
419            "sqlite" | "sqlite3" | "db" => Ok(Format::Sqlite),
420            "parquet" | "pq" => Ok(Format::Parquet),
421            "md" | "markdown" => Ok(Format::Markdown),
422            "html" => Ok(Format::Html),
423            "table" => Ok(Format::Table),
424            "env" | "dotenv" => Ok(Format::Env),
425            "ini" | "cfg" | "conf" | "config" => Ok(Format::Ini),
426            "properties" => Ok(Format::Properties),
427            "hcl" | "tf" | "tfvars" => Ok(Format::Hcl),
428            "plist" => Ok(Format::Plist),
429            "template" | "tpl" => Ok(Format::Template),
430            _ => Err(DkitError::UnknownFormat(s.to_string())),
431        }
432    }
433
434    /// 사용 가능한 출력 포맷 목록을 반환한다
435    pub fn list_output_formats() -> Vec<(&'static str, &'static str)> {
436        let mut formats = vec![
437            ("json", "JSON format"),
438            ("csv", "Comma-separated values"),
439            ("tsv", "Tab-separated values (CSV variant)"),
440            ("yaml", "YAML format"),
441            ("toml", "TOML format"),
442            ("jsonl", "JSON Lines (one JSON object per line)"),
443        ];
444
445        if cfg!(feature = "xml") {
446            formats.push(("xml", "XML format"));
447        } else {
448            formats.push(("xml", "XML format (requires --features xml)"));
449        }
450        if cfg!(feature = "msgpack") {
451            formats.push(("msgpack", "MessagePack binary format"));
452        } else {
453            formats.push((
454                "msgpack",
455                "MessagePack binary format (requires --features msgpack)",
456            ));
457        }
458        if cfg!(feature = "excel") {
459            formats.push(("xlsx", "Excel spreadsheet (input only)"));
460        } else {
461            formats.push(("xlsx", "Excel spreadsheet (requires --features excel)"));
462        }
463        if cfg!(feature = "sqlite") {
464            formats.push(("sqlite", "SQLite database (input only)"));
465        } else {
466            formats.push(("sqlite", "SQLite database (requires --features sqlite)"));
467        }
468        if cfg!(feature = "parquet") {
469            formats.push(("parquet", "Apache Parquet columnar format"));
470        } else {
471            formats.push((
472                "parquet",
473                "Apache Parquet columnar format (requires --features parquet)",
474            ));
475        }
476
477        if cfg!(feature = "hcl") {
478            formats.push(("hcl", "HCL (HashiCorp Configuration Language)"));
479        } else {
480            formats.push((
481                "hcl",
482                "HCL (HashiCorp Configuration Language) (requires --features hcl)",
483            ));
484        }
485
486        if cfg!(feature = "plist") {
487            formats.push(("plist", "macOS Property List format"));
488        } else {
489            formats.push((
490                "plist",
491                "macOS Property List format (requires --features plist)",
492            ));
493        }
494
495        if cfg!(feature = "template") {
496            formats.push(("template", "Custom text output via Tera templates"));
497        } else {
498            formats.push((
499                "template",
500                "Custom text output via Tera templates (requires --features template)",
501            ));
502        }
503
504        formats.push(("env", "Environment variables (.env) format"));
505        formats.push(("ini", "INI/CFG configuration file format"));
506        formats.push(("properties", "Java .properties file format"));
507        formats.push(("md", "Markdown table"));
508        formats.push(("html", "HTML table"));
509        formats.push(("table", "Terminal table (default for view)"));
510
511        formats
512    }
513}
514
515impl std::fmt::Display for Format {
516    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
517        match self {
518            Format::Json => write!(f, "JSON"),
519            Format::Jsonl => write!(f, "JSONL"),
520            Format::Csv => write!(f, "CSV"),
521            Format::Yaml => write!(f, "YAML"),
522            Format::Toml => write!(f, "TOML"),
523            Format::Xml => write!(f, "XML"),
524            Format::Msgpack => write!(f, "MessagePack"),
525            Format::Xlsx => write!(f, "Excel"),
526            Format::Sqlite => write!(f, "SQLite"),
527            Format::Parquet => write!(f, "Parquet"),
528            Format::Markdown => write!(f, "Markdown"),
529            Format::Html => write!(f, "HTML"),
530            Format::Table => write!(f, "Table"),
531            Format::Env => write!(f, "ENV"),
532            Format::Ini => write!(f, "INI"),
533            Format::Properties => write!(f, "Properties"),
534            Format::Hcl => write!(f, "HCL"),
535            Format::Plist => write!(f, "Plist"),
536            Format::Template => write!(f, "Template"),
537        }
538    }
539}
540
541/// 파일 확장자로 포맷을 자동 감지
542pub fn detect_format(path: &Path) -> Result<Format, DkitError> {
543    // .env 파일 감지: .env, .env.local, .env.development 등
544    if let Some(name) = path.file_name().and_then(|n| n.to_str()) {
545        if name == ".env" || name.starts_with(".env.") {
546            return Ok(Format::Env);
547        }
548    }
549
550    match path.extension().and_then(|e| e.to_str()) {
551        Some("json") => Ok(Format::Json),
552        Some("jsonl" | "ndjson") => Ok(Format::Jsonl),
553        Some("csv" | "tsv") => Ok(Format::Csv),
554        Some("yaml" | "yml") => Ok(Format::Yaml),
555        Some("toml") => Ok(Format::Toml),
556        Some("xml") => Ok(Format::Xml),
557        Some("msgpack") => Ok(Format::Msgpack),
558        Some("xlsx" | "xls" | "xlsm" | "xlsb" | "ods") => Ok(Format::Xlsx),
559        Some("db" | "sqlite" | "sqlite3") => Ok(Format::Sqlite),
560        Some("parquet" | "pq") => Ok(Format::Parquet),
561        Some("md") => Ok(Format::Markdown),
562        Some("html") => Ok(Format::Html),
563        Some("env") => Ok(Format::Env),
564        Some("ini" | "cfg") => Ok(Format::Ini),
565        Some("properties") => Ok(Format::Properties),
566        Some("hcl" | "tf" | "tfvars") => Ok(Format::Hcl),
567        Some("plist") => Ok(Format::Plist),
568        Some(ext) => Err(DkitError::UnknownFormat(ext.to_string())),
569        None => Err(DkitError::UnknownFormat("(no extension)".to_string())),
570    }
571}
572
573/// 콘텐츠 스니핑으로 포맷을 자동 감지
574///
575/// 감지 우선순위:
576/// 1. `<?xml` → XML
577/// 2. 첫 줄이 JSON 객체 + 둘째 줄도 JSON 객체 → JSONL
578/// 3. `{` 또는 `[` 시작 → JSON
579/// 4. 탭 구분자가 포함된 구조적 데이터 → CSV (TSV)
580/// 5. TOML 패턴 (키 = 값, [섹션])
581/// 6. YAML 패턴 (키: 값, ---)
582pub fn detect_format_from_content(content: &str) -> Result<(Format, Option<char>), DkitError> {
583    let trimmed = content.trim_start();
584
585    if trimmed.is_empty() {
586        return Err(DkitError::FormatDetectionFailed(
587            "input is empty".to_string(),
588        ));
589    }
590
591    // Plist: <?xml followed by <!DOCTYPE plist or <plist
592    if trimmed.starts_with("<?xml") || trimmed.starts_with("<!DOCTYPE") {
593        if trimmed.contains("<!DOCTYPE plist") || trimmed.contains("<plist") {
594            return Ok((Format::Plist, None));
595        }
596        return Ok((Format::Xml, None));
597    }
598
599    // JSONL: 첫째 줄과 둘째 줄 모두 JSON 객체
600    let mut lines = trimmed.lines().filter(|l| !l.trim().is_empty());
601    if let Some(first_line) = lines.next() {
602        if let Some(second_line) = lines.next() {
603            let first_trimmed = first_line.trim();
604            let second_trimmed = second_line.trim();
605            if first_trimmed.starts_with('{')
606                && first_trimmed.ends_with('}')
607                && second_trimmed.starts_with('{')
608                && second_trimmed.ends_with('}')
609            {
610                return Ok((Format::Jsonl, None));
611            }
612        }
613    }
614
615    // JSON: { 로 시작 (단일 객체)
616    if trimmed.starts_with('{') {
617        return Ok((Format::Json, None));
618    }
619
620    // [ 로 시작: JSON 배열 vs TOML 섹션 헤더 구분
621    // TOML 섹션: [word] 형태 (내부가 알파벳/밑줄/점/하이픈)
622    // JSON 배열: [값, ...] 또는 여러 줄에 걸친 배열
623    if trimmed.starts_with('[') {
624        let first_line = trimmed.lines().next().unwrap_or("").trim();
625        // TOML 섹션 헤더: [section] 또는 [[array]]
626        let is_toml_section = first_line.starts_with("[[")
627            || (first_line.starts_with('[')
628                && first_line.ends_with(']')
629                && !first_line.contains(',')
630                && first_line[1..first_line.len() - 1].chars().all(|c| {
631                    c.is_alphanumeric() || c == '_' || c == '-' || c == '.' || c == ' ' || c == '"'
632                }));
633        if is_toml_section {
634            return Ok((Format::Toml, None));
635        }
636        return Ok((Format::Json, None));
637    }
638
639    // XML: < 로 시작하는 태그 (<?xml 없이 바로 태그로 시작하는 경우)
640    if trimmed.starts_with('<') {
641        return Ok((Format::Xml, None));
642    }
643
644    // TSV: 첫째 줄에 탭이 포함되어 있으면 TSV로 간주
645    if let Some(first_line) = trimmed.lines().next() {
646        if first_line.contains('\t') {
647            return Ok((Format::Csv, Some('\t')));
648        }
649    }
650
651    // ENV: KEY=VALUE 패턴 (대문자 키, = 주변에 공백 없음)
652    // TOML과 구별: TOML은 " = " (공백 포함), ENV는 "KEY=value" (공백 없음, 대문자)
653    let first_line = trimmed.lines().next().unwrap_or("");
654    let ft = first_line.trim();
655    let env_line = ft.strip_prefix("export ").unwrap_or(ft);
656    if let Some(eq_pos) = env_line.find('=') {
657        let key_part = env_line[..eq_pos].trim();
658        if !key_part.is_empty()
659            && !key_part.contains(' ')
660            && key_part
661                .chars()
662                .all(|c| c.is_ascii_uppercase() || c.is_ascii_digit() || c == '_')
663        {
664            // 여러 줄이 모두 ENV 패턴인지 확인
665            let env_lines = trimmed
666                .lines()
667                .filter(|l| {
668                    let t = l.trim();
669                    !t.is_empty() && !t.starts_with('#')
670                })
671                .take(5);
672            let all_env = env_lines.clone().all(|l| {
673                let l = l.trim().strip_prefix("export ").unwrap_or(l.trim());
674                if let Some(p) = l.find('=') {
675                    let k = l[..p].trim();
676                    !k.is_empty()
677                        && !k.contains(' ')
678                        && k.chars()
679                            .all(|c| c.is_ascii_uppercase() || c.is_ascii_digit() || c == '_')
680                } else {
681                    false
682                }
683            });
684            if all_env {
685                return Ok((Format::Env, None));
686            }
687        }
688    }
689
690    // TOML: key = value 패턴 (섹션 헤더는 위에서 처리됨)
691    if ft.contains(" = ") {
692        return Ok((Format::Toml, None));
693    }
694
695    // YAML: --- 또는 key: value 패턴
696    if ft.starts_with("---") || ft.contains(": ") || ft.ends_with(':') {
697        return Ok((Format::Yaml, None));
698    }
699
700    // CSV: 콤마가 포함된 구조적 데이터
701    if ft.contains(',') {
702        return Ok((Format::Csv, None));
703    }
704
705    Err(DkitError::FormatDetectionFailed(
706        "could not determine format from content".to_string(),
707    ))
708}
709
710/// 파일 확장자에 따른 기본 delimiter 반환
711/// `.tsv` 파일은 탭 구분자를 사용한다.
712pub fn default_delimiter(path: &Path) -> Option<char> {
713    match path.extension().and_then(|e| e.to_str()) {
714        Some("tsv") => Some('\t'),
715        _ => None,
716    }
717}
718
719/// `--to` 포맷 문자열에 따른 기본 delimiter 반환
720pub fn default_delimiter_for_format(format_str: &str) -> Option<char> {
721    match format_str.to_lowercase().as_str() {
722        "tsv" => Some('\t'),
723        _ => None,
724    }
725}
726
727/// Format-specific options controlling how data is read or written.
728///
729/// Use [`Default::default()`] to get sensible defaults.
730#[derive(Debug, Clone)]
731pub struct FormatOptions {
732    /// CSV delimiter (기본: ',')
733    pub delimiter: Option<char>,
734    /// CSV 헤더 없음 모드
735    pub no_header: bool,
736    /// Pretty-print 출력
737    pub pretty: bool,
738    /// Compact 출력 (JSON)
739    pub compact: bool,
740    /// YAML inline/flow 스타일
741    pub flow_style: bool,
742    /// XML 루트 엘리먼트 이름 (기본: "root")
743    pub root_element: Option<String>,
744    /// HTML 인라인 CSS 스타일 포함
745    pub styled: bool,
746    /// HTML 완전한 문서 출력
747    pub full_html: bool,
748    /// JSON 들여쓰기 설정 (숫자: 스페이스 수, "tab": 탭 문자)
749    pub indent: Option<String>,
750    /// JSON 오브젝트 키를 알파벳순으로 정렬
751    pub sort_keys: bool,
752    /// Inline template string for template output
753    pub template: Option<String>,
754    /// File path for template output
755    pub template_file: Option<String>,
756}
757
758impl Default for FormatOptions {
759    fn default() -> Self {
760        Self {
761            delimiter: None,
762            no_header: false,
763            pretty: true,
764            compact: false,
765            flow_style: false,
766            root_element: None,
767            styled: false,
768            full_html: false,
769            indent: None,
770            sort_keys: false,
771            template: None,
772            template_file: None,
773        }
774    }
775}
776
777/// Trait for reading a data format into a [`Value`].
778///
779/// Implement this trait to add support for reading a new data format.
780#[allow(dead_code)]
781pub trait FormatReader {
782    /// Parse the given string content and return a [`Value`].
783    fn read(&self, input: &str) -> anyhow::Result<Value>;
784
785    /// Parse data from an [`io::Read`](std::io::Read) source and return a [`Value`].
786    fn read_from_reader(&self, reader: impl Read) -> anyhow::Result<Value>;
787}
788
789/// Trait for writing a [`Value`] to a data format.
790///
791/// Implement this trait to add support for writing a new data format.
792#[allow(dead_code)]
793pub trait FormatWriter {
794    /// Serialize the given [`Value`] and return the formatted string.
795    fn write(&self, value: &Value) -> anyhow::Result<String>;
796
797    /// Serialize the given [`Value`] and write to an [`io::Write`](std::io::Write) destination.
798    fn write_to_writer(&self, value: &Value, writer: impl Write) -> anyhow::Result<()>;
799}
800
801#[cfg(test)]
802mod tests {
803    use super::*;
804    use std::path::PathBuf;
805
806    // --- Format::from_str ---
807
808    #[test]
809    fn test_format_from_str() {
810        assert_eq!(Format::from_str("json").unwrap(), Format::Json);
811        assert_eq!(Format::from_str("JSON").unwrap(), Format::Json);
812        assert_eq!(Format::from_str("csv").unwrap(), Format::Csv);
813        assert_eq!(Format::from_str("tsv").unwrap(), Format::Csv);
814        assert_eq!(Format::from_str("TSV").unwrap(), Format::Csv);
815        assert_eq!(Format::from_str("yaml").unwrap(), Format::Yaml);
816        assert_eq!(Format::from_str("yml").unwrap(), Format::Yaml);
817        assert_eq!(Format::from_str("toml").unwrap(), Format::Toml);
818    }
819
820    #[test]
821    fn test_format_from_str_jsonl() {
822        assert_eq!(Format::from_str("jsonl").unwrap(), Format::Jsonl);
823        assert_eq!(Format::from_str("jsonlines").unwrap(), Format::Jsonl);
824        assert_eq!(Format::from_str("ndjson").unwrap(), Format::Jsonl);
825        assert_eq!(Format::from_str("JSONL").unwrap(), Format::Jsonl);
826    }
827
828    #[test]
829    fn test_format_from_str_xml() {
830        assert_eq!(Format::from_str("xml").unwrap(), Format::Xml);
831    }
832
833    #[test]
834    fn test_format_from_str_msgpack() {
835        assert_eq!(Format::from_str("msgpack").unwrap(), Format::Msgpack);
836        assert_eq!(Format::from_str("messagepack").unwrap(), Format::Msgpack);
837    }
838
839    #[test]
840    fn test_format_from_str_markdown() {
841        assert_eq!(Format::from_str("md").unwrap(), Format::Markdown);
842        assert_eq!(Format::from_str("markdown").unwrap(), Format::Markdown);
843        assert_eq!(Format::from_str("MD").unwrap(), Format::Markdown);
844    }
845
846    #[test]
847    fn test_format_from_str_unknown() {
848        let err = Format::from_str("bin").unwrap_err();
849        assert!(matches!(err, DkitError::UnknownFormat(s) if s == "bin"));
850    }
851
852    // --- Format::Display ---
853
854    #[test]
855    fn test_format_display() {
856        assert_eq!(Format::Json.to_string(), "JSON");
857        assert_eq!(Format::Csv.to_string(), "CSV");
858        assert_eq!(Format::Yaml.to_string(), "YAML");
859        assert_eq!(Format::Toml.to_string(), "TOML");
860        assert_eq!(Format::Jsonl.to_string(), "JSONL");
861        assert_eq!(Format::Xml.to_string(), "XML");
862        assert_eq!(Format::Msgpack.to_string(), "MessagePack");
863        assert_eq!(Format::Markdown.to_string(), "Markdown");
864        assert_eq!(Format::Table.to_string(), "Table");
865    }
866
867    #[test]
868    fn test_format_from_str_table() {
869        assert_eq!(Format::from_str("table").unwrap(), Format::Table);
870        assert_eq!(Format::from_str("TABLE").unwrap(), Format::Table);
871    }
872
873    #[test]
874    fn test_list_output_formats() {
875        let formats = Format::list_output_formats();
876        assert!(formats.len() >= 10);
877        assert!(formats.iter().any(|(name, _)| *name == "table"));
878        assert!(formats.iter().any(|(name, _)| *name == "json"));
879    }
880
881    // --- detect_format ---
882
883    #[test]
884    fn test_detect_format_json() {
885        assert_eq!(
886            detect_format(&PathBuf::from("data.json")).unwrap(),
887            Format::Json
888        );
889    }
890
891    #[test]
892    fn test_detect_format_csv_tsv() {
893        assert_eq!(
894            detect_format(&PathBuf::from("data.csv")).unwrap(),
895            Format::Csv
896        );
897        assert_eq!(
898            detect_format(&PathBuf::from("data.tsv")).unwrap(),
899            Format::Csv
900        );
901    }
902
903    #[test]
904    fn test_detect_format_yaml() {
905        assert_eq!(
906            detect_format(&PathBuf::from("data.yaml")).unwrap(),
907            Format::Yaml
908        );
909        assert_eq!(
910            detect_format(&PathBuf::from("data.yml")).unwrap(),
911            Format::Yaml
912        );
913    }
914
915    #[test]
916    fn test_detect_format_toml() {
917        assert_eq!(
918            detect_format(&PathBuf::from("config.toml")).unwrap(),
919            Format::Toml
920        );
921    }
922
923    #[test]
924    fn test_detect_format_jsonl() {
925        assert_eq!(
926            detect_format(&PathBuf::from("data.jsonl")).unwrap(),
927            Format::Jsonl
928        );
929        assert_eq!(
930            detect_format(&PathBuf::from("data.ndjson")).unwrap(),
931            Format::Jsonl
932        );
933    }
934
935    #[test]
936    fn test_detect_format_xml() {
937        assert_eq!(
938            detect_format(&PathBuf::from("data.xml")).unwrap(),
939            Format::Xml
940        );
941    }
942
943    #[test]
944    fn test_detect_format_msgpack() {
945        assert_eq!(
946            detect_format(&PathBuf::from("data.msgpack")).unwrap(),
947            Format::Msgpack
948        );
949    }
950
951    #[test]
952    fn test_detect_format_markdown() {
953        assert_eq!(
954            detect_format(&PathBuf::from("output.md")).unwrap(),
955            Format::Markdown
956        );
957    }
958
959    #[test]
960    fn test_detect_format_unknown_ext() {
961        let err = detect_format(&PathBuf::from("data.bin")).unwrap_err();
962        assert!(matches!(err, DkitError::UnknownFormat(s) if s == "bin"));
963    }
964
965    #[test]
966    fn test_detect_format_no_extension() {
967        let err = detect_format(&PathBuf::from("Makefile")).unwrap_err();
968        assert!(matches!(err, DkitError::UnknownFormat(s) if s == "(no extension)"));
969    }
970
971    // --- FormatOptions ---
972
973    // --- default_delimiter ---
974
975    #[test]
976    fn test_default_delimiter_tsv() {
977        assert_eq!(default_delimiter(&PathBuf::from("data.tsv")), Some('\t'));
978    }
979
980    #[test]
981    fn test_default_delimiter_csv() {
982        assert_eq!(default_delimiter(&PathBuf::from("data.csv")), None);
983    }
984
985    #[test]
986    fn test_default_delimiter_json() {
987        assert_eq!(default_delimiter(&PathBuf::from("data.json")), None);
988    }
989
990    #[test]
991    fn test_default_delimiter_for_format_tsv() {
992        assert_eq!(default_delimiter_for_format("tsv"), Some('\t'));
993        assert_eq!(default_delimiter_for_format("TSV"), Some('\t'));
994    }
995
996    #[test]
997    fn test_default_delimiter_for_format_csv() {
998        assert_eq!(default_delimiter_for_format("csv"), None);
999    }
1000
1001    // --- FormatOptions ---
1002
1003    #[test]
1004    fn test_format_options_default() {
1005        let opts = FormatOptions::default();
1006        assert_eq!(opts.delimiter, None);
1007        assert!(!opts.no_header);
1008        assert!(opts.pretty);
1009        assert!(!opts.compact);
1010        assert!(!opts.flow_style);
1011        assert_eq!(opts.root_element, None);
1012    }
1013
1014    // --- detect_format_from_content ---
1015
1016    #[test]
1017    fn test_sniff_xml_declaration() {
1018        let (fmt, delim) = detect_format_from_content("<?xml version=\"1.0\"?>\n<root/>").unwrap();
1019        assert_eq!(fmt, Format::Xml);
1020        assert_eq!(delim, None);
1021    }
1022
1023    #[test]
1024    fn test_sniff_xml_tag() {
1025        let (fmt, _) = detect_format_from_content("<root><item>hello</item></root>").unwrap();
1026        assert_eq!(fmt, Format::Xml);
1027    }
1028
1029    #[test]
1030    fn test_sniff_json_object() {
1031        let (fmt, _) = detect_format_from_content("{\"name\": \"Alice\"}").unwrap();
1032        assert_eq!(fmt, Format::Json);
1033    }
1034
1035    #[test]
1036    fn test_sniff_json_array() {
1037        let (fmt, _) = detect_format_from_content("[1, 2, 3]").unwrap();
1038        assert_eq!(fmt, Format::Json);
1039    }
1040
1041    #[test]
1042    fn test_sniff_jsonl() {
1043        let content = "{\"name\": \"Alice\"}\n{\"name\": \"Bob\"}\n";
1044        let (fmt, _) = detect_format_from_content(content).unwrap();
1045        assert_eq!(fmt, Format::Jsonl);
1046    }
1047
1048    #[test]
1049    fn test_sniff_tsv() {
1050        let content = "name\tage\tcity\nAlice\t30\tSeoul\n";
1051        let (fmt, delim) = detect_format_from_content(content).unwrap();
1052        assert_eq!(fmt, Format::Csv);
1053        assert_eq!(delim, Some('\t'));
1054    }
1055
1056    #[test]
1057    fn test_sniff_toml_section() {
1058        let content = "[database]\nhost = \"localhost\"\nport = 5432\n";
1059        let (fmt, _) = detect_format_from_content(content).unwrap();
1060        assert_eq!(fmt, Format::Toml);
1061    }
1062
1063    #[test]
1064    fn test_sniff_toml_key_value() {
1065        let content = "title = \"My App\"\nversion = \"1.0\"\n";
1066        let (fmt, _) = detect_format_from_content(content).unwrap();
1067        assert_eq!(fmt, Format::Toml);
1068    }
1069
1070    #[test]
1071    fn test_sniff_yaml_document() {
1072        let content = "---\nname: Alice\nage: 30\n";
1073        let (fmt, _) = detect_format_from_content(content).unwrap();
1074        assert_eq!(fmt, Format::Yaml);
1075    }
1076
1077    #[test]
1078    fn test_sniff_yaml_key_value() {
1079        let content = "name: Alice\nage: 30\n";
1080        let (fmt, _) = detect_format_from_content(content).unwrap();
1081        assert_eq!(fmt, Format::Yaml);
1082    }
1083
1084    #[test]
1085    fn test_sniff_csv() {
1086        let content = "name,age,city\nAlice,30,Seoul\n";
1087        let (fmt, delim) = detect_format_from_content(content).unwrap();
1088        assert_eq!(fmt, Format::Csv);
1089        assert_eq!(delim, None);
1090    }
1091
1092    #[test]
1093    fn test_sniff_empty_content() {
1094        let err = detect_format_from_content("").unwrap_err();
1095        assert!(matches!(err, DkitError::FormatDetectionFailed(_)));
1096    }
1097
1098    #[test]
1099    fn test_sniff_whitespace_only() {
1100        let err = detect_format_from_content("   \n  \n").unwrap_err();
1101        assert!(matches!(err, DkitError::FormatDetectionFailed(_)));
1102    }
1103}