ktav 0.3.1

Ktav — a plain configuration format. Three rules, zero indentation, zero quoting. Serde-native.
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
//! The unified `Error` enum plus its std / serde `Error` trait impls.
//!
//! ## Structured vs free-form errors
//!
//! Historically every parse failure was funneled into `Error::Syntax(String)`
//! and callers (notably `editor/lsp/src/diagnostics.rs` and the seven
//! non-Rust bindings) recovered structure (line, column, category) by
//! re-parsing the formatted message with regexes. Starting with `0.1.5`
//! the parser emits `Error::Structured(ErrorKind)` instead — every call
//! site retains `(line, column, kind)` so callers no longer have to
//! reverse-engineer it. `0.1.6` extends every variant with a [`Span`]
//! describing the exact byte range in the source input, and adds three
//! new variants (`UnbalancedBracket`, `InlineNonEmptyCompound`,
//! `MissingSeparator`) promoted out of `ErrorKind::Other`.
//!
//! `Error::Syntax(String)` is **kept** as a public variant for backward
//! compatibility with `0.1.x` callers that pattern-match on the string.
//! The parser itself never constructs it any more, but external code
//! (tests, downstream wrappers) is free to.
//!
//! ## Display parity (the contract)
//!
//! `<ErrorKind as Display>` is *byte-identical* to what the matching
//! `Error::Syntax(format!(…))` site produced in `0.1.4` for the seven
//! categories pinned in `tests/error_format.rs`. The three new
//! categories added in `0.1.6` get their own pinned strings in the same
//! file.
//!
//! ## Forward compatibility
//!
//! Both `Error` and the three error-kind enums are `#[non_exhaustive]`
//! so future additive variants don't constitute a breaking change.

use std::fmt::{self, Display};
use std::io;

/// A byte-offset range inside the original input string. `start..end`
/// is half-open (Rust convention). `start == end` denotes an
/// insertion-point span (no bytes covered).
///
/// For convenience, [`Span::slice`] returns the substring covered by
/// the span and [`Span::line_col`] returns the 1-based line and
/// 0-based column at the span's start.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct Span {
    /// Inclusive byte offset of the first covered byte.
    pub start: u32,
    /// Exclusive byte offset of the byte past the covered range.
    pub end: u32,
}

impl Span {
    /// Construct a new span. No checks are performed — callers are
    /// expected to keep `start <= end` and both within `input.len()`.
    pub const fn new(start: u32, end: u32) -> Self {
        Self { start, end }
    }

    /// Empty / placeholder span. Used for `Other`-class internal-state
    /// errors where no source range is meaningful.
    pub const EMPTY: Span = Span { start: 0, end: 0 };

    /// Borrow the substring covered by this span out of `input`.
    /// Returns `None` if the offsets are outside `input` or fall on
    /// non-UTF-8 character boundaries.
    pub fn slice<'a>(&self, input: &'a str) -> Option<&'a str> {
        let start = self.start as usize;
        let end = self.end as usize;
        if start > end || end > input.len() {
            return None;
        }
        if !input.is_char_boundary(start) || !input.is_char_boundary(end) {
            return None;
        }
        Some(&input[start..end])
    }

    /// Compute the 1-based line and 0-based column corresponding to
    /// this span's `start` offset within `input`. Counts `\n` as a
    /// line break; `\r` is treated as part of the previous line.
    /// Out-of-range start clamps to `input.len()`.
    pub fn line_col(&self, input: &str) -> (u32, u32) {
        let start = (self.start as usize).min(input.len());
        let bytes = input.as_bytes();
        let mut line: u32 = 1;
        let mut last_nl: usize = 0;
        for (i, &b) in bytes.iter().enumerate().take(start) {
            if b == b'\n' {
                line += 1;
                last_nl = i + 1;
            }
        }
        let col = (start - last_nl) as u32;
        (line, col)
    }
}

/// The single error type returned by every entry point.
#[non_exhaustive]
#[derive(Debug)]
pub enum Error {
    /// I/O error while reading from disk.
    Io(io::Error),
    /// Structured parse error — preferred form, emitted by the parser
    /// since `0.1.5`. Carries `(line, [span], kind)` for callers that
    /// need to drive IDE diagnostics, pretty-printers, etc.
    Structured(ErrorKind),
    /// Free-form syntax error (legacy). The parser no longer constructs
    /// this directly; new code should prefer `Error::Structured`. Kept
    /// public for `0.1.x` backward-compat.
    ///
    /// The message may carry a category prefix in the form
    /// `"Line N: Category: ..."` for specific error classes such as
    /// `InvalidTypedScalar`.
    Syntax(String),
    /// A custom message produced by `serde` during (de)serialization —
    /// for example a type mismatch or a missing field.
    Message(String),
}

/// Structured parse-error category. Each variant carries the
/// information the parser had at the point of failure; `Display` for
/// `ErrorKind` reproduces byte-for-byte the string the legacy
/// `Error::Syntax(format!(…))` call site produced in `0.1.4` for the
/// seven categories that existed then; the three new categories added
/// in `0.1.6` (`UnbalancedBracket`, `InlineNonEmptyCompound`,
/// `MissingSeparator`) have their own pinned Display strings.
///
/// `#[non_exhaustive]` so future kinds can be added without breakage.
#[non_exhaustive]
#[allow(missing_docs)]
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum ErrorKind {
    /// A separator (`:`, `::`, `:i`, `:f`) is glued to its body without
    /// the mandatory whitespace / end-of-line. Spec § 5.3 / § 5.4.
    ///
    /// `column` is the 0-based column inside the line; `span` covers
    /// the offending separator-and-glued-body region.
    MissingSeparatorSpace {
        line: u32,
        column: u32,
        marker: char,
        span: Span,
    },
    /// The body following a `:i` / `:f` typed-scalar marker is not a
    /// valid integer / float literal (or is empty / opens a compound).
    /// `body` is the human-readable detail (e.g. `"integer body is empty"`
    /// or `"'abc' is not a valid integer literal for `:i`"`).
    InvalidTypedScalar {
        line: u32,
        marker: char,
        body: String,
        span: Span,
    },
    /// Two pairs in the same object share the same key.
    DuplicateKey { line: u32, key: String, span: Span },
    /// A dotted-key insertion clashed with an existing entry. `kind`
    /// distinguishes the three concrete shapes the parser observes.
    KeyPathConflict {
        line: u32,
        path: String,
        kind: ConflictKind,
        span: Span,
    },
    /// `: value` — the line had no key before the separator.
    EmptyKey { line: u32, span: Span },
    /// A key contained an invalid byte (whitespace, `[`, `]`, `{`, `}`,
    /// `:`, `#`) or was empty after segmentation.
    InvalidKey { line: u32, key: String, span: Span },
    /// The document ended while a compound (object, array, multi-line
    /// string) was still open. `span` covers from the opener through
    /// EOF (or `Span::EMPTY` if the opener wasn't tracked).
    UnclosedCompound { kind: CompoundKind, span: Span },
    /// A closer (`}` or `]`) appeared without a matching open compound,
    /// or with the wrong shape relative to the open one.
    /// `expected` records what was actually open (or could be opened),
    /// and `found` records the literal closer character that appeared.
    UnbalancedBracket {
        line: u32,
        span: Span,
        expected: CompoundKind,
        found: char,
    },
    /// A `key: { … }` / `key: [ … ]` / array-item line tried to fit a
    /// non-empty compound on a single line. Spec § 6.7 forbids this —
    /// every entry / item must be on its own line.
    InlineNonEmptyCompound { line: u32, span: Span, body: String },
    /// A non-blank, non-comment, non-closer, non-array-item line lacks
    /// the `:` that would make it a `key: value` pair. The whole
    /// trimmed line is the offending span.
    MissingSeparator { line: u32, span: Span },
    /// Escape hatch for parser failures that don't map to one of the
    /// canonical categories — currently only parser-internal invariant
    /// violations (`pending key already set`, `closed compound without
    /// pending key`, `multi-line string closed without pending key`).
    Other {
        line: Option<u32>,
        message: String,
        span: Span,
    },
}

/// Sub-classification for `ErrorKind::KeyPathConflict`.
///
/// `#[non_exhaustive]` so future conflict shapes can be added without
/// breaking pattern-match exhaustiveness in downstream crates.
#[non_exhaustive]
#[allow(missing_docs)]
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum ConflictKind {
    /// Existing scalar/array would have to be overwritten with an
    /// object-shaped value (or vice versa).
    Overwrite {
        existing: &'static str,
        new_kind: &'static str,
    },
    /// A dotted-key tried to descend into / through an existing scalar
    /// (`db: 1` then `db.x: 2`).
    BlockedByValue,
    /// A synthetic dotted-key prefix was re-opened after an
    /// intervening different prefix had closed it (event-stream
    /// path only).
    SyntheticReopen,
}

/// Sub-classification for `ErrorKind::UnclosedCompound` and
/// `ErrorKind::UnbalancedBracket`.
///
/// `#[non_exhaustive]` so future compound flavours can be added
/// without breakage.
#[non_exhaustive]
#[allow(missing_docs)]
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum CompoundKind {
    Object,
    Array,
    MultilineStripped,
    MultilineVerbatim,
}

impl Display for ErrorKind {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        match self {
            ErrorKind::MissingSeparatorSpace { line, .. } => write!(
                f,
                "Line {}: MissingSeparatorSpace: separator must be followed by whitespace or end of line",
                line
            ),
            ErrorKind::InvalidTypedScalar { line, body, .. } => {
                write!(f, "Line {}: InvalidTypedScalar: {}", line, body)
            }
            ErrorKind::DuplicateKey { line, key, .. } => {
                write!(f, "Line {}: duplicate key '{}'", line, key)
            }
            ErrorKind::KeyPathConflict { line, path, kind, .. } => match kind {
                ConflictKind::Overwrite { existing, new_kind } => write!(
                    f,
                    "Line {}: conflict at '{}' \u{2014} cannot overwrite {} with {}",
                    line, path, existing, new_kind
                ),
                ConflictKind::BlockedByValue => write!(
                    f,
                    "Line {}: conflict at '{}' \u{2014} an existing value blocks the path",
                    line, path
                ),
                ConflictKind::SyntheticReopen => write!(
                    f,
                    "Line {}: conflict at '{}' \u{2014} synthetic dotted-key prefix already closed by an intervening different prefix; group lines with the same prefix together",
                    line, path
                ),
            },
            ErrorKind::EmptyKey { line, .. } => write!(f, "Empty key at line {}", line),
            ErrorKind::InvalidKey { line, key, .. } => {
                write!(f, "Invalid key at line {}: '{}'", line, key)
            }
            ErrorKind::UnclosedCompound { kind, .. } => match kind {
                CompoundKind::Object => write!(f, "Unclosed object at end of input"),
                CompoundKind::Array => write!(f, "Unclosed array at end of input"),
                CompoundKind::MultilineStripped | CompoundKind::MultilineVerbatim => {
                    write!(f, "Unclosed multi-line string at end of input")
                }
            },
            ErrorKind::UnbalancedBracket {
                line,
                expected,
                found,
                ..
            } => {
                let opener = match expected {
                    CompoundKind::Object => '{',
                    CompoundKind::Array => '[',
                    // Multi-line markers can't be opened by `{` / `[`,
                    // but if a future caller wires this up we still
                    // produce sensible output.
                    CompoundKind::MultilineStripped | CompoundKind::MultilineVerbatim => '(',
                };
                write!(
                    f,
                    "Line {}: UnbalancedBracket: '{}' without matching '{}'",
                    line, found, opener
                )
            }
            ErrorKind::InlineNonEmptyCompound { line, body, .. } => write!(
                f,
                "Line {}: InlineNonEmptyCompound: inline non-empty {} is not supported; put entries on separate lines",
                line, body
            ),
            ErrorKind::MissingSeparator { line, .. } => write!(
                f,
                "Line {}: MissingSeparator: object entries must be 'key: value' pairs",
                line
            ),
            ErrorKind::Other { message, .. } => f.write_str(message),
        }
    }
}

impl Display for Error {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        match self {
            Error::Io(e) => write!(f, "I/O error: {}", e),
            Error::Structured(k) => write!(f, "Syntax error: {}", k),
            Error::Syntax(m) => write!(f, "Syntax error: {}", m),
            Error::Message(m) => write!(f, "{}", m),
        }
    }
}

impl std::error::Error for Error {}

impl From<io::Error> for Error {
    fn from(e: io::Error) -> Self {
        Error::Io(e)
    }
}

impl serde::ser::Error for Error {
    fn custom<T: Display>(msg: T) -> Self {
        Error::Message(msg.to_string())
    }
}

impl serde::de::Error for Error {
    fn custom<T: Display>(msg: T) -> Self {
        Error::Message(msg.to_string())
    }
}

// ---------------------------------------------------------------------------
// Convenience accessors on `Error`
// ---------------------------------------------------------------------------

impl Error {
    /// Returns the 1-based line number associated with the error, if
    /// available. `None` for [`Error::Io`], [`Error::Message`], free-
    /// form [`Error::Syntax`], EOF-detected `UnclosedCompound`, and
    /// the parser-internal `Other` variants that lack a line number.
    pub fn line(&self) -> Option<u32> {
        match self {
            Error::Structured(k) => k.line(),
            _ => None,
        }
    }

    /// Returns the byte-offset span associated with the error, if
    /// available. `None` for [`Error::Io`], [`Error::Message`] and
    /// [`Error::Syntax`]. May return `Some(Span::EMPTY)` for an
    /// internal-state structured error that has no meaningful source
    /// range.
    pub fn span(&self) -> Option<Span> {
        match self {
            Error::Structured(k) => Some(k.span()),
            _ => None,
        }
    }
}

impl ErrorKind {
    /// 1-based line number, or `None` for variants where the failure
    /// is detected at EOF or carries no line context.
    pub fn line(&self) -> Option<u32> {
        match self {
            ErrorKind::MissingSeparatorSpace { line, .. }
            | ErrorKind::InvalidTypedScalar { line, .. }
            | ErrorKind::DuplicateKey { line, .. }
            | ErrorKind::KeyPathConflict { line, .. }
            | ErrorKind::EmptyKey { line, .. }
            | ErrorKind::InvalidKey { line, .. }
            | ErrorKind::UnbalancedBracket { line, .. }
            | ErrorKind::InlineNonEmptyCompound { line, .. }
            | ErrorKind::MissingSeparator { line, .. } => Some(*line),
            ErrorKind::UnclosedCompound { .. } => None,
            ErrorKind::Other { line, .. } => *line,
        }
    }

    /// Byte-offset span covering the offending source region.
    pub fn span(&self) -> Span {
        match self {
            ErrorKind::MissingSeparatorSpace { span, .. }
            | ErrorKind::InvalidTypedScalar { span, .. }
            | ErrorKind::DuplicateKey { span, .. }
            | ErrorKind::KeyPathConflict { span, .. }
            | ErrorKind::EmptyKey { span, .. }
            | ErrorKind::InvalidKey { span, .. }
            | ErrorKind::UnclosedCompound { span, .. }
            | ErrorKind::UnbalancedBracket { span, .. }
            | ErrorKind::InlineNonEmptyCompound { span, .. }
            | ErrorKind::MissingSeparator { span, .. }
            | ErrorKind::Other { span, .. } => *span,
        }
    }
}