Skip to main content

tf_types/
yaml.rs

1//! TF-YAML: the strict YAML subset TrustForge parses and emits — in-house
2//! codec (see `docs/dependency-audit.md`), mirror of
3//! `tools/tf-types-ts/src/core/yaml.ts`. Read that file's doc comment for
4//! the full subset definition; the two implementations must stay
5//! semantically identical (verified by parsing every `.yaml` in the repo
6//! plus the conformance suites in both languages).
7//!
8//! Values parse into `serde_json::Value`: mapping keys are always
9//! strings, integers restrict to the ±2^53-1 safe range (larger digit
10//! runs stay strings, matching the TS/JSON number model), and the
11//! non-JSON floats `.inf`/`.nan` are rejected.
12//!
13//! Deliberately rejected (out of subset): anchors & aliases, tags,
14//! multi-document streams, complex (`? `) keys.
15
16use serde_json::{Map, Number, Value};
17use std::fmt;
18
19#[derive(Debug, Clone, PartialEq, Eq)]
20pub struct YamlError {
21    message: String,
22    /// 0-based raw line, when known.
23    line: Option<usize>,
24}
25
26impl YamlError {
27    fn new(message: impl Into<String>, line: Option<usize>) -> Self {
28        Self {
29            message: message.into(),
30            line,
31        }
32    }
33}
34
35impl fmt::Display for YamlError {
36    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
37        match self.line {
38            Some(l) => write!(f, "line {}: {}", l + 1, self.message),
39            None => write!(f, "{}", self.message),
40        }
41    }
42}
43
44impl std::error::Error for YamlError {}
45
46const MAX_SAFE_INTEGER: i64 = 9_007_199_254_740_991; // 2^53 - 1
47
48/* ------------------------------------------------------------------ */
49/*  Scalar resolution (YAML 1.2 core schema)                           */
50/* ------------------------------------------------------------------ */
51
52fn resolve_scalar(text: &str) -> Result<Value, YamlError> {
53    match text {
54        "" | "~" | "null" | "Null" | "NULL" => return Ok(Value::Null),
55        "true" | "True" | "TRUE" => return Ok(Value::Bool(true)),
56        "false" | "False" | "FALSE" => return Ok(Value::Bool(false)),
57        ".nan" | ".NaN" | ".NAN" => {
58            return Err(YamlError::new("non-finite floats are not supported", None))
59        }
60        _ => {}
61    }
62    let bytes = text.as_bytes();
63    let digits = |s: &[u8]| !s.is_empty() && s.iter().all(u8::is_ascii_digit);
64    let unsigned = bytes
65        .strip_prefix(b"-")
66        .or_else(|| bytes.strip_prefix(b"+"));
67    let body = unsigned.unwrap_or(bytes);
68    if digits(body) {
69        if let Ok(n) = text.parse::<i64>() {
70            if n.abs() <= MAX_SAFE_INTEGER {
71                return Ok(Value::Number(n.into()));
72            }
73        }
74        return Ok(Value::String(text.to_string())); // overflow-sized digit runs
75    }
76    if let Some(hex) = text.strip_prefix("0x") {
77        if !hex.is_empty() && hex.bytes().all(|b| b.is_ascii_hexdigit()) {
78            if let Ok(n) = i64::from_str_radix(hex, 16) {
79                if n <= MAX_SAFE_INTEGER {
80                    return Ok(Value::Number(n.into()));
81                }
82            }
83            return Ok(Value::String(text.to_string()));
84        }
85    }
86    if let Some(oct) = text.strip_prefix("0o") {
87        if !oct.is_empty() && oct.bytes().all(|b| (b'0'..=b'7').contains(&b)) {
88            if let Ok(n) = i64::from_str_radix(oct, 8) {
89                if n <= MAX_SAFE_INTEGER {
90                    return Ok(Value::Number(n.into()));
91                }
92            }
93            return Ok(Value::String(text.to_string()));
94        }
95    }
96    if is_float_syntax(text) {
97        if let Ok(f) = text.parse::<f64>() {
98            if f.is_finite() {
99                if let Some(n) = Number::from_f64(f) {
100                    return Ok(Value::Number(n));
101                }
102            }
103        }
104    }
105    if matches!(
106        text,
107        ".inf" | ".Inf" | ".INF" | "-.inf" | "-.Inf" | "-.INF" | "+.inf" | "+.Inf" | "+.INF"
108    ) {
109        return Err(YamlError::new("non-finite floats are not supported", None));
110    }
111    Ok(Value::String(text.to_string()))
112}
113
114/// `[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?`
115fn is_float_syntax(text: &str) -> bool {
116    let s = text
117        .strip_prefix('-')
118        .or_else(|| text.strip_prefix('+'))
119        .unwrap_or(text);
120    let (mantissa, exponent) = match s.find(['e', 'E']) {
121        Some(i) => (&s[..i], Some(&s[i + 1..])),
122        None => (s, None),
123    };
124    let mantissa_ok = if let Some(frac) = mantissa.strip_prefix('.') {
125        !frac.is_empty() && frac.bytes().all(|b| b.is_ascii_digit())
126    } else if let Some(dot) = mantissa.find('.') {
127        let (int, frac) = (&mantissa[..dot], &mantissa[dot + 1..]);
128        !int.is_empty()
129            && int.bytes().all(|b| b.is_ascii_digit())
130            && frac.bytes().all(|b| b.is_ascii_digit())
131    } else {
132        // Pure integers are handled earlier; only exponent forms remain.
133        exponent.is_some() && !mantissa.is_empty() && mantissa.bytes().all(|b| b.is_ascii_digit())
134    };
135    if !mantissa_ok {
136        return false;
137    }
138    match exponent {
139        None => true,
140        Some(e) => {
141            let e = e
142                .strip_prefix('-')
143                .or_else(|| e.strip_prefix('+'))
144                .unwrap_or(e);
145            !e.is_empty() && e.bytes().all(|b| b.is_ascii_digit())
146        }
147    }
148}
149
150/* ------------------------------------------------------------------ */
151/*  Parser                                                             */
152/* ------------------------------------------------------------------ */
153
154#[derive(Clone)]
155struct Line {
156    indent: usize,
157    content: String,
158    raw: usize,
159}
160
161struct Parser {
162    lines: Vec<String>,
163    items: Vec<Line>,
164    pos: usize,
165}
166
167impl Parser {
168    fn new(input: &str) -> Result<Self, YamlError> {
169        let lines: Vec<String> = input
170            .split('\n')
171            .map(|l| l.trim_end_matches('\r').to_string())
172            .collect();
173        let mut items = Vec::new();
174        for (i, raw) in lines.iter().enumerate() {
175            let trimmed = raw.trim_end();
176            let indent = count_indent(trimmed);
177            if raw[..indent.min(raw.len())].contains('\t')
178                || raw.trim_start_matches(' ').starts_with('\t') && raw.trim().is_empty()
179            {
180                return Err(YamlError::new(
181                    "tabs are not allowed in indentation",
182                    Some(i),
183                ));
184            }
185            let content = &trimmed[indent..];
186            if content.is_empty() || content.starts_with('#') {
187                continue;
188            }
189            if content == "---" {
190                if items.is_empty() {
191                    continue; // leading document marker
192                }
193                return Err(YamlError::new(
194                    "multi-document streams are not supported",
195                    Some(i),
196                ));
197            }
198            if content == "..." {
199                continue;
200            }
201            items.push(Line {
202                indent,
203                content: content.to_string(),
204                raw: i,
205            });
206        }
207        Ok(Self {
208            lines,
209            items,
210            pos: 0,
211        })
212    }
213
214    fn parse(mut self) -> Result<Value, YamlError> {
215        if self.items.is_empty() {
216            return Ok(Value::Null);
217        }
218        let value = self.parse_node(0)?;
219        if self.pos < self.items.len() {
220            return Err(YamlError::new(
221                "unexpected content",
222                Some(self.items[self.pos].raw),
223            ));
224        }
225        Ok(value)
226    }
227
228    fn peek(&self) -> Option<&Line> {
229        self.items.get(self.pos)
230    }
231
232    fn parse_node(&mut self, min_indent: usize) -> Result<Value, YamlError> {
233        let Some(line) = self.peek() else {
234            return Ok(Value::Null);
235        };
236        if line.indent < min_indent {
237            return Ok(Value::Null);
238        }
239        if line.content == "-" || line.content.starts_with("- ") {
240            let indent = line.indent;
241            return self.parse_sequence(indent);
242        }
243        if find_key(&line.content).is_some() {
244            let indent = line.indent;
245            return self.parse_mapping(indent);
246        }
247        self.parse_scalar_lines()
248    }
249
250    fn parse_sequence(&mut self, indent: usize) -> Result<Value, YamlError> {
251        let mut out = Vec::new();
252        while let Some(line) = self.peek() {
253            if line.indent != indent {
254                break;
255            }
256            if line.content == "-" {
257                self.pos += 1;
258                let deeper = self.peek().map(|n| n.indent > indent).unwrap_or(false);
259                out.push(if deeper {
260                    self.parse_node(indent + 1)?
261                } else {
262                    Value::Null
263                });
264                continue;
265            }
266            if !line.content.starts_with("- ") {
267                break;
268            }
269            // Rewrite `- rest` in place as deeper-indented content so
270            // nested structures parse naturally with true columns.
271            let rest = line.content[2..].to_string();
272            let raw = line.raw;
273            let extra = count_indent(&rest);
274            self.items[self.pos] = Line {
275                indent: indent + 2 + extra,
276                content: rest[extra..].to_string(),
277                raw,
278            };
279            out.push(self.parse_node(indent + 1)?);
280        }
281        Ok(Value::Array(out))
282    }
283
284    fn parse_mapping(&mut self, indent: usize) -> Result<Value, YamlError> {
285        let mut out = Map::new();
286        while let Some(line) = self.peek() {
287            if line.indent != indent {
288                break;
289            }
290            if line.content == "-" || line.content.starts_with("- ") {
291                break;
292            }
293            let Some((key, rest)) = find_key(&line.content) else {
294                break;
295            };
296            let raw = line.raw;
297            self.pos += 1;
298            let value = if rest.is_empty() {
299                let deeper = self.peek().map(|n| n.indent > indent).unwrap_or(false);
300                if deeper {
301                    self.parse_node(indent + 1)?
302                } else {
303                    Value::Null
304                }
305            } else if rest.starts_with('|') || rest.starts_with('>') {
306                Value::String(self.parse_block_scalar(&rest, indent, raw)?)
307            } else {
308                self.parse_inline_value(&rest, indent, raw)?
309            };
310            if out.contains_key(&key) {
311                return Err(YamlError::new(
312                    format!("duplicate mapping key {key:?}"),
313                    Some(raw),
314                ));
315            }
316            out.insert(key, value);
317        }
318        Ok(Value::Object(out))
319    }
320
321    fn parse_inline_value(
322        &mut self,
323        rest: &str,
324        indent: usize,
325        raw_line: usize,
326    ) -> Result<Value, YamlError> {
327        if rest.starts_with('[') || rest.starts_with('{') {
328            let text = self.collect_flow(rest, raw_line)?;
329            let mut flow = FlowParser {
330                s: text.as_bytes(),
331                text: &text,
332                i: 0,
333                raw_line,
334            };
335            let value = flow.parse_value()?;
336            flow.expect_end()?;
337            return Ok(value);
338        }
339        if rest.starts_with('"') || rest.starts_with('\'') {
340            let (value, end) = parse_quoted(rest)
341                .ok_or_else(|| YamlError::new("unterminated quoted scalar", Some(raw_line)))?;
342            let after = strip_comment(rest[end..].trim());
343            if !after.is_empty() {
344                return Err(YamlError::new(
345                    "unexpected content after quoted scalar",
346                    Some(raw_line),
347                ));
348            }
349            return Ok(Value::String(value));
350        }
351        if rest.starts_with('&') || rest.starts_with('*') {
352            return Err(YamlError::new(
353                "anchors and aliases are not supported (TF-YAML subset)",
354                Some(raw_line),
355            ));
356        }
357        if rest.starts_with('!') {
358            return Err(YamlError::new(
359                "tags are not supported (TF-YAML subset)",
360                Some(raw_line),
361            ));
362        }
363        // Plain scalar with folded continuation lines.
364        let mut text = strip_comment(rest).to_string();
365        while let Some(next) = self.peek() {
366            if next.indent <= indent {
367                break;
368            }
369            if next.content == "-" || next.content.starts_with("- ") {
370                break;
371            }
372            if find_key(&next.content).is_some() {
373                break;
374            }
375            text.push(' ');
376            text.push_str(strip_comment(&next.content));
377            self.pos += 1;
378        }
379        resolve_scalar(text.trim()).map_err(|e| YamlError::new(e.message, Some(raw_line)))
380    }
381
382    fn parse_scalar_lines(&mut self) -> Result<Value, YamlError> {
383        let first = self.peek().expect("caller checked").clone();
384        self.pos += 1;
385        self.parse_inline_value(&first.content, first.indent.saturating_sub(1), first.raw)
386    }
387
388    fn collect_flow(&mut self, first: &str, raw_line: usize) -> Result<String, YamlError> {
389        let mut text = strip_comment(first.trim()).to_string();
390        loop {
391            if flow_balanced(&text) {
392                return Ok(text);
393            }
394            let Some(next) = self.peek() else {
395                return Err(YamlError::new(
396                    "unterminated flow collection",
397                    Some(raw_line),
398                ));
399            };
400            let chunk = strip_comment(next.content.trim()).to_string();
401            self.pos += 1;
402            text.push(' ');
403            text.push_str(&chunk);
404        }
405    }
406
407    fn parse_block_scalar(
408        &mut self,
409        header: &str,
410        key_indent: usize,
411        raw_line: usize,
412    ) -> Result<String, YamlError> {
413        let folded = header.starts_with('>');
414        #[derive(PartialEq)]
415        enum Chomp {
416            Clip,
417            Strip,
418            Keep,
419        }
420        let mut chomp = Chomp::Clip;
421        let mut explicit_indent = None;
422        for c in strip_comment(header[1..].trim()).chars() {
423            match c {
424                '-' => chomp = Chomp::Strip,
425                '+' => chomp = Chomp::Keep,
426                '1'..='9' => explicit_indent = Some(key_indent + c.to_digit(10).unwrap() as usize),
427                _ => {
428                    return Err(YamlError::new(
429                        format!("bad block scalar header {header:?}"),
430                        Some(raw_line),
431                    ))
432                }
433            }
434        }
435
436        let start_raw = raw_line + 1;
437        let mut end_raw = start_raw;
438        for i in start_raw..self.lines.len() {
439            let l = &self.lines[i];
440            if l.trim().is_empty() {
441                continue;
442            }
443            if count_indent(l) <= key_indent {
444                break;
445            }
446            end_raw = i + 1;
447        }
448        let raw: Vec<&str> = (start_raw..end_raw)
449            .map(|i| self.lines[i].as_str())
450            .collect();
451        while self.pos < self.items.len() && self.items[self.pos].raw < end_raw {
452            self.pos += 1;
453        }
454
455        let mut block_indent = explicit_indent;
456        if block_indent.is_none() {
457            for l in &raw {
458                if !l.trim().is_empty() {
459                    block_indent = Some(count_indent(l));
460                    break;
461                }
462            }
463        }
464        let block_indent = block_indent.unwrap_or(key_indent + 1);
465        if block_indent <= key_indent {
466            return Err(YamlError::new(
467                "block scalar body must be indented past its key",
468                Some(raw_line),
469            ));
470        }
471
472        let body: Vec<String> = raw
473            .iter()
474            .map(|l| {
475                if l.trim().is_empty() {
476                    String::new()
477                } else {
478                    l[block_indent.min(count_indent(l))..].to_string()
479                }
480            })
481            .collect();
482        let mut end = body.len();
483        while end > 0 && body[end - 1].is_empty() {
484            end -= 1;
485        }
486        let kept = &body[..end];
487        let trailing_blank = body.len() - end;
488
489        let text = if !folded {
490            kept.join("\n")
491        } else {
492            let mut text = String::new();
493            let mut prev_was_text = false;
494            let mut prev_was_literal = false;
495            for l in kept {
496                let literal = !l.is_empty() && (l.starts_with(' ') || l.starts_with('\t'));
497                if l.is_empty() {
498                    text.push('\n');
499                    prev_was_text = false;
500                    prev_was_literal = false;
501                    continue;
502                }
503                if prev_was_text && !literal && !prev_was_literal {
504                    text.push(' ');
505                } else if prev_was_literal || (prev_was_text && literal) {
506                    text.push('\n');
507                }
508                text.push_str(l);
509                prev_was_text = true;
510                prev_was_literal = literal;
511            }
512            text
513        };
514        Ok(match chomp {
515            Chomp::Strip => text.trim_end_matches('\n').to_string(),
516            Chomp::Keep => {
517                if kept.is_empty() && trailing_blank == 0 {
518                    text
519                } else {
520                    text + &"\n".repeat(trailing_blank + 1)
521                }
522            }
523            Chomp::Clip => {
524                if text.is_empty() && trailing_blank == 0 {
525                    text
526                } else {
527                    text + "\n"
528                }
529            }
530        })
531    }
532}
533
534fn count_indent(s: &str) -> usize {
535    s.bytes().take_while(|&b| b == b' ').count()
536}
537
538/// Split a mapping line into key and remainder (comment-stripped).
539fn find_key(content: &str) -> Option<(String, String)> {
540    if content.starts_with('"') || content.starts_with('\'') {
541        let (key, end) = parse_quoted(content)?;
542        let after = content[end..].trim_start();
543        let rest = after.strip_prefix(':')?;
544        if !rest.is_empty() && !rest.starts_with(' ') {
545            return None;
546        }
547        return Some((key, strip_comment(rest.trim()).to_string()));
548    }
549    let bytes = content.as_bytes();
550    let mut depth = 0i32;
551    for i in 0..bytes.len() {
552        match bytes[i] {
553            b'[' | b'{' => depth += 1,
554            b']' | b'}' => depth -= 1,
555            b':' if depth == 0 && (i + 1 == bytes.len() || bytes[i + 1] == b' ') => {
556                let key = content[..i].trim();
557                if key.is_empty() || key.starts_with('#') {
558                    return None;
559                }
560                return Some((
561                    key.to_string(),
562                    strip_comment(content[i + 1..].trim()).to_string(),
563                ));
564            }
565            b'#' if i > 0 && bytes[i - 1] == b' ' => return None,
566            _ => {}
567        }
568    }
569    None
570}
571
572/// Strip a ` #comment` suffix outside quotes.
573fn strip_comment(s: &str) -> &str {
574    if s.starts_with('#') {
575        return "";
576    }
577    let bytes = s.as_bytes();
578    let mut in_single = false;
579    let mut in_double = false;
580    let mut i = 0;
581    while i < bytes.len() {
582        match bytes[i] {
583            b'\'' if !in_double => in_single = !in_single,
584            b'"' if !in_single && (i == 0 || bytes[i - 1] != b'\\') => in_double = !in_double,
585            b'#' if !in_single
586                && !in_double
587                && i > 0
588                && (bytes[i - 1] == b' ' || bytes[i - 1] == b'\t') =>
589            {
590                return s[..i].trim_end();
591            }
592            _ => {}
593        }
594        i += 1;
595    }
596    s
597}
598
599fn flow_balanced(s: &str) -> bool {
600    let bytes = s.as_bytes();
601    let mut depth = 0i32;
602    let mut in_single = false;
603    let mut in_double = false;
604    let mut i = 0;
605    while i < bytes.len() {
606        let b = bytes[i];
607        if in_single {
608            if b == b'\'' {
609                in_single = false;
610            }
611        } else if in_double {
612            if b == b'\\' {
613                i += 1;
614            } else if b == b'"' {
615                in_double = false;
616            }
617        } else {
618            match b {
619                b'\'' => in_single = true,
620                b'"' => in_double = true,
621                b'[' | b'{' => depth += 1,
622                b']' | b'}' => depth -= 1,
623                _ => {}
624            }
625        }
626        i += 1;
627    }
628    depth <= 0 && !in_single && !in_double
629}
630
631/// Parse a quoted scalar at the start of `s`; returns (value, end byte offset).
632fn parse_quoted(s: &str) -> Option<(String, usize)> {
633    let mut chars = s.char_indices();
634    let (_, quote) = chars.next()?;
635    if quote == '\'' {
636        let mut out = String::new();
637        let mut iter = chars.peekable();
638        while let Some((i, c)) = iter.next() {
639            if c == '\'' {
640                if matches!(iter.peek(), Some((_, '\''))) {
641                    out.push('\'');
642                    iter.next();
643                } else {
644                    return Some((out, i + 1));
645                }
646            } else {
647                out.push(c);
648            }
649        }
650        return None;
651    }
652    if quote == '"' {
653        let bytes = s.as_bytes();
654        let mut out = String::new();
655        let mut i = 1;
656        while i < s.len() {
657            let c = s[i..].chars().next()?;
658            if c == '"' {
659                return Some((out, i + 1));
660            }
661            if c == '\\' {
662                let esc = s[i + 1..].chars().next()?;
663                i += 1 + esc.len_utf8();
664                match esc {
665                    'n' => out.push('\n'),
666                    't' => out.push('\t'),
667                    'r' => out.push('\r'),
668                    '0' => out.push('\0'),
669                    'a' => out.push('\x07'),
670                    'b' => out.push('\x08'),
671                    'f' => out.push('\x0c'),
672                    'v' => out.push('\x0b'),
673                    'e' => out.push('\x1b'),
674                    '"' | '\\' | '/' => out.push(esc),
675                    'x' => {
676                        let h = s.get(i..i + 2)?;
677                        out.push(char::from_u32(u32::from_str_radix(h, 16).ok()?)?);
678                        i += 2;
679                    }
680                    'u' => {
681                        let h = s.get(i..i + 4)?;
682                        let cp = u32::from_str_radix(h, 16).ok()?;
683                        // Surrogates in \u escapes: TS uses UTF-16 code
684                        // units; TF-YAML content never uses them, so
685                        // reject rather than mis-handle.
686                        out.push(char::from_u32(cp)?);
687                        i += 4;
688                    }
689                    'U' => {
690                        let h = s.get(i..i + 8)?;
691                        out.push(char::from_u32(u32::from_str_radix(h, 16).ok()?)?);
692                        i += 8;
693                    }
694                    _ => return None,
695                }
696                let _ = bytes;
697                continue;
698            }
699            out.push(c);
700            i += c.len_utf8();
701        }
702        return None;
703    }
704    None
705}
706
707/* ------------------------------------------------------------------ */
708/*  Flow-collection parser                                             */
709/* ------------------------------------------------------------------ */
710
711struct FlowParser<'a> {
712    s: &'a [u8],
713    text: &'a str,
714    i: usize,
715    raw_line: usize,
716}
717
718impl<'a> FlowParser<'a> {
719    fn ws(&mut self) {
720        while self.i < self.s.len() && (self.s[self.i] == b' ' || self.s[self.i] == b'\t') {
721            self.i += 1;
722        }
723    }
724
725    fn fail(&self, msg: &str) -> YamlError {
726        YamlError::new(format!("{msg} in flow collection"), Some(self.raw_line))
727    }
728
729    fn parse_value(&mut self) -> Result<Value, YamlError> {
730        self.ws();
731        let Some(&c) = self.s.get(self.i) else {
732            return Err(self.fail("unexpected end"));
733        };
734        match c {
735            b'[' => self.parse_array(),
736            b'{' => self.parse_map(),
737            b'"' | b'\'' => {
738                let (value, end) = parse_quoted(&self.text[self.i..])
739                    .ok_or_else(|| self.fail("unterminated quoted scalar"))?;
740                self.i += end;
741                Ok(Value::String(value))
742            }
743            b'&' | b'*' => Err(self.fail("anchors/aliases are not supported")),
744            b'!' => Err(self.fail("tags are not supported")),
745            _ => {
746                let start = self.i;
747                while self.i < self.s.len() {
748                    let ch = self.s[self.i];
749                    if ch == b',' || ch == b']' || ch == b'}' {
750                        break;
751                    }
752                    if ch == b':' && (self.i + 1 == self.s.len() || self.s[self.i + 1] == b' ') {
753                        break;
754                    }
755                    self.i += 1;
756                }
757                resolve_scalar(self.text[start..self.i].trim())
758                    .map_err(|e| YamlError::new(e.message, Some(self.raw_line)))
759            }
760        }
761    }
762
763    fn parse_array(&mut self) -> Result<Value, YamlError> {
764        self.i += 1;
765        let mut out = Vec::new();
766        self.ws();
767        if self.s.get(self.i) == Some(&b']') {
768            self.i += 1;
769            return Ok(Value::Array(out));
770        }
771        loop {
772            out.push(self.parse_value()?);
773            self.ws();
774            match self.s.get(self.i) {
775                Some(b',') => {
776                    self.i += 1;
777                    self.ws();
778                    if self.s.get(self.i) == Some(&b']') {
779                        self.i += 1;
780                        return Ok(Value::Array(out));
781                    }
782                }
783                Some(b']') => {
784                    self.i += 1;
785                    return Ok(Value::Array(out));
786                }
787                _ => return Err(self.fail("expected , or ]")),
788            }
789        }
790    }
791
792    fn parse_map(&mut self) -> Result<Value, YamlError> {
793        self.i += 1;
794        let mut out = Map::new();
795        self.ws();
796        if self.s.get(self.i) == Some(&b'}') {
797            self.i += 1;
798            return Ok(Value::Object(out));
799        }
800        loop {
801            self.ws();
802            let key = match self.s.get(self.i) {
803                Some(b'"') | Some(b'\'') => {
804                    let (k, end) = parse_quoted(&self.text[self.i..])
805                        .ok_or_else(|| self.fail("unterminated quoted key"))?;
806                    self.i += end;
807                    k
808                }
809                _ => {
810                    let start = self.i;
811                    while self.i < self.s.len()
812                        && self.s[self.i] != b':'
813                        && self.s[self.i] != b','
814                        && self.s[self.i] != b'}'
815                    {
816                        self.i += 1;
817                    }
818                    self.text[start..self.i].trim().to_string()
819                }
820            };
821            self.ws();
822            let mut value = Value::Null;
823            if self.s.get(self.i) == Some(&b':') {
824                self.i += 1;
825                value = self.parse_value()?;
826            }
827            if out.contains_key(&key) {
828                return Err(self.fail(&format!("duplicate key {key:?}")));
829            }
830            out.insert(key, value);
831            self.ws();
832            match self.s.get(self.i) {
833                Some(b',') => {
834                    self.i += 1;
835                    self.ws();
836                    if self.s.get(self.i) == Some(&b'}') {
837                        self.i += 1;
838                        return Ok(Value::Object(out));
839                    }
840                }
841                Some(b'}') => {
842                    self.i += 1;
843                    return Ok(Value::Object(out));
844                }
845                _ => return Err(self.fail("expected , or }")),
846            }
847        }
848    }
849
850    fn expect_end(&mut self) -> Result<(), YamlError> {
851        self.ws();
852        if self.i < self.s.len() {
853            return Err(self.fail("trailing content"));
854        }
855        Ok(())
856    }
857}
858
859/* ------------------------------------------------------------------ */
860/*  Public API                                                         */
861/* ------------------------------------------------------------------ */
862
863/// Parse TF-YAML into a JSON value tree.
864pub fn parse(input: &str) -> Result<Value, YamlError> {
865    Parser::new(input)?.parse()
866}
867
868/// Parse TF-YAML directly into a typed struct.
869pub fn from_str<T: serde::de::DeserializeOwned>(input: &str) -> Result<T, YamlError> {
870    let value = parse(input)?;
871    serde_json::from_value(value).map_err(|e| YamlError::new(e.to_string(), None))
872}
873
874/// Serialize a value as block-style TF-YAML.
875pub fn to_string<T: serde::Serialize>(value: &T) -> Result<String, YamlError> {
876    let json = serde_json::to_value(value).map_err(|e| YamlError::new(e.to_string(), None))?;
877    Ok(emit(&json))
878}
879
880/* ------------------------------------------------------------------ */
881/*  Emitter                                                            */
882/* ------------------------------------------------------------------ */
883
884fn plain_safe(s: &str) -> bool {
885    let mut chars = s.chars();
886    match chars.next() {
887        Some(c) if c.is_ascii_alphanumeric() || c == '_' => {}
888        _ => return false,
889    }
890    s.chars()
891        .all(|c| c.is_ascii_alphanumeric() || matches!(c, '_' | '-' | '.' | '/' | '@' | ' '))
892}
893
894fn needs_quoting(s: &str) -> bool {
895    if s.is_empty() || !plain_safe(s) || s != s.trim() {
896        return true;
897    }
898    match resolve_scalar(s) {
899        Ok(Value::String(_)) => {}
900        _ => return true,
901    }
902    // Syntactically number-like strings must be quoted regardless of our
903    // own resolution — see the TS mirror.
904    let body = s
905        .strip_prefix('-')
906        .or_else(|| s.strip_prefix('+'))
907        .unwrap_or(s);
908    if !body.is_empty()
909        && body.bytes().all(|b| b.is_ascii_digit() || b == b'_')
910        && body.bytes().next().unwrap().is_ascii_digit()
911    {
912        return true;
913    }
914    if is_float_syntax(s) {
915        return true;
916    }
917    false
918}
919
920fn quote(s: &str) -> String {
921    serde_json::to_string(s).expect("string serializes")
922}
923
924fn format_scalar(v: &Value) -> String {
925    match v {
926        Value::Null => "null".to_string(),
927        Value::Bool(true) => "true".to_string(),
928        Value::Bool(false) => "false".to_string(),
929        Value::Number(n) => n.to_string(),
930        Value::String(s) => {
931            if needs_quoting(s) {
932                quote(s)
933            } else {
934                s.clone()
935            }
936        }
937        _ => unreachable!("caller checked scalar"),
938    }
939}
940
941fn is_scalar(v: &Value) -> bool {
942    !matches!(v, Value::Array(_) | Value::Object(_))
943}
944
945fn format_key(k: &str) -> String {
946    if needs_quoting(k) || k.contains(':') || k.contains('#') {
947        quote(k)
948    } else {
949        k.to_string()
950    }
951}
952
953/// Multi-line strings become literal block scalars when safe.
954fn format_multiline(v: &Value, indent: usize) -> String {
955    if let Value::String(s) = v {
956        let ok = s.contains('\n')
957            && !s.starts_with(char::is_whitespace)
958            && !s.ends_with(char::is_whitespace)
959            && !s.contains("\n\n\n");
960        if ok {
961            let pad = "  ".repeat(indent);
962            let body: Vec<String> = s
963                .split('\n')
964                .map(|l| {
965                    if l.is_empty() {
966                        String::new()
967                    } else {
968                        format!("{pad}{l}")
969                    }
970                })
971                .collect();
972            return format!("|-\n{}", body.join("\n"));
973        }
974    }
975    format_scalar(v)
976}
977
978fn emit_entry(prefix: &str, key: &str, val: &Value, child_indent: usize, out: &mut Vec<String>) {
979    if is_scalar(val) {
980        out.push(format!(
981            "{prefix}{}: {}",
982            format_key(key),
983            format_multiline(val, child_indent)
984        ));
985    } else {
986        out.push(format!("{prefix}{}:", format_key(key)));
987        emit_node(val, child_indent, out);
988    }
989}
990
991fn emit_node(v: &Value, indent: usize, out: &mut Vec<String>) {
992    let pad = "  ".repeat(indent);
993    match v {
994        Value::Array(items) => {
995            if items.is_empty() {
996                let last = out.last_mut().expect("array attaches to a line");
997                last.push_str(" []");
998                return;
999            }
1000            for item in items {
1001                if is_scalar(item) {
1002                    out.push(format!("{pad}- {}", format_scalar(item)));
1003                } else if let Value::Array(_) = item {
1004                    out.push(format!("{pad}-"));
1005                    emit_node(item, indent + 1, out);
1006                } else if let Value::Object(map) = item {
1007                    if map.is_empty() {
1008                        out.push(format!("{pad}- {{}}"));
1009                        continue;
1010                    }
1011                    let mut first = true;
1012                    for (k, val) in map {
1013                        let prefix = if first {
1014                            format!("{pad}- ")
1015                        } else {
1016                            format!("{pad}  ")
1017                        };
1018                        first = false;
1019                        emit_entry(&prefix, k, val, indent + 2, out);
1020                    }
1021                }
1022            }
1023        }
1024        Value::Object(map) => {
1025            if map.is_empty() {
1026                let last = out.last_mut().expect("map attaches to a line");
1027                last.push_str(" {}");
1028                return;
1029            }
1030            for (k, val) in map {
1031                emit_entry(&pad, k, val, indent + 1, out);
1032            }
1033        }
1034        _ => unreachable!("caller checked collection"),
1035    }
1036}
1037
1038fn emit(v: &Value) -> String {
1039    if is_scalar(v) {
1040        return format!("{}\n", format_scalar(v));
1041    }
1042    // Top-level empty collections have no key line to attach to.
1043    match v {
1044        Value::Array(items) if items.is_empty() => return "[]\n".to_string(),
1045        Value::Object(map) if map.is_empty() => return "{}\n".to_string(),
1046        _ => {}
1047    }
1048    let mut out = Vec::new();
1049    emit_node(v, 0, &mut out);
1050    format!("{}\n", out.join("\n"))
1051}
1052
1053#[cfg(test)]
1054mod tests {
1055    use super::*;
1056    use serde_json::json;
1057
1058    #[test]
1059    fn scalars_resolve_like_yaml12_core() {
1060        assert_eq!(parse("x: null").unwrap(), json!({"x": null}));
1061        assert_eq!(parse("x: ~").unwrap(), json!({"x": null}));
1062        assert_eq!(parse("x:").unwrap(), json!({"x": null}));
1063        assert_eq!(parse("x: true").unwrap(), json!({"x": true}));
1064        assert_eq!(parse("x: False").unwrap(), json!({"x": false}));
1065        assert_eq!(parse("x: 42").unwrap(), json!({"x": 42}));
1066        assert_eq!(parse("x: -7").unwrap(), json!({"x": -7}));
1067        assert_eq!(parse("x: 0x1f").unwrap(), json!({"x": 31}));
1068        assert_eq!(parse("x: 3.5").unwrap(), json!({"x": 3.5}));
1069        assert_eq!(parse("x: 1e3").unwrap(), json!({"x": 1000.0}));
1070        assert_eq!(parse("x: yes").unwrap(), json!({"x": "yes"})); // 1.2, not 1.1
1071        assert_eq!(parse("x: on").unwrap(), json!({"x": "on"}));
1072        // Overflow-sized digit runs stay strings (JS parity).
1073        assert_eq!(
1074            parse("x: 070000004041424344454647").unwrap(),
1075            json!({"x": "070000004041424344454647"})
1076        );
1077    }
1078
1079    #[test]
1080    fn block_structures() {
1081        let doc = "top:\n  list:\n    - a\n    - name: n1\n      value: 1\n    - - nested\n  map:\n    k: v\n";
1082        assert_eq!(
1083            parse(doc).unwrap(),
1084            json!({"top": {"list": ["a", {"name": "n1", "value": 1}, ["nested"]], "map": {"k": "v"}}})
1085        );
1086    }
1087
1088    #[test]
1089    fn flow_structures() {
1090        assert_eq!(
1091            parse("x: [1, two, {k: v}, [3]]").unwrap(),
1092            json!({"x": [1, "two", {"k": "v"}, [3]]})
1093        );
1094        assert_eq!(
1095            parse("x: {a: 1, b: [2]}").unwrap(),
1096            json!({"x": {"a": 1, "b": [2]}})
1097        );
1098        // Multi-line flow.
1099        assert_eq!(
1100            parse("x: [1,\n   2,\n   3]").unwrap(),
1101            json!({"x": [1, 2, 3]})
1102        );
1103    }
1104
1105    #[test]
1106    fn quoted_scalars() {
1107        assert_eq!(parse("x: 'it''s'").unwrap(), json!({"x": "it's"}));
1108        assert_eq!(parse("x: \"a\\nb\"").unwrap(), json!({"x": "a\nb"}));
1109        assert_eq!(parse("x: \"42\"").unwrap(), json!({"x": "42"}));
1110        assert_eq!(parse("\"a: b\": 1").unwrap(), json!({"a: b": 1}));
1111    }
1112
1113    #[test]
1114    fn block_scalars() {
1115        assert_eq!(
1116            parse("x: |\n  line1\n  line2\ny: 1").unwrap(),
1117            json!({"x": "line1\nline2\n", "y": 1})
1118        );
1119        assert_eq!(
1120            parse("x: |-\n  line1\n  line2").unwrap(),
1121            json!({"x": "line1\nline2"})
1122        );
1123        assert_eq!(
1124            parse("x: >-\n  fold\n  ed\n\n  para").unwrap(),
1125            json!({"x": "fold ed\npara"})
1126        );
1127        // Body lines that look like structure stay text.
1128        assert_eq!(
1129            parse("x: |\n  key: value\n  - item\ny: 2").unwrap(),
1130            json!({"x": "key: value\n- item\n", "y": 2})
1131        );
1132    }
1133
1134    #[test]
1135    fn comments_and_blanks() {
1136        let doc = "# header\nx: 1 # trailing\n\ny: \"# not a comment\"\nz: a#b\n";
1137        assert_eq!(
1138            parse(doc).unwrap(),
1139            json!({"x": 1, "y": "# not a comment", "z": "a#b"})
1140        );
1141    }
1142
1143    #[test]
1144    fn subset_violations_rejected() {
1145        assert!(parse("x: &a 1").is_err());
1146        assert!(parse("x: *a").is_err());
1147        assert!(parse("x: !!str 1").is_err());
1148        assert!(parse("a: 1\n---\nb: 2").is_err());
1149        assert!(parse("x: 1\nx: 2").is_err());
1150    }
1151
1152    #[test]
1153    fn emitter_round_trips() {
1154        let doc = json!({
1155            "name": "test",
1156            "count": 42,
1157            "pi": 3.5,
1158            "flag": true,
1159            "nothing": null,
1160            "digits": "0123456789012345678",
1161            "multiline": "first\nsecond",
1162            "list": [1, "two", {"k": "v", "nested": {"deep": [1, 2]}}],
1163            "looks_like_bool": "true",
1164            "empty_list": [],
1165            "empty_map": {},
1166            "weird key: yes": "value",
1167        });
1168        let text = to_string(&doc).unwrap();
1169        assert_eq!(parse(&text).unwrap(), doc, "emitted:\n{text}");
1170    }
1171
1172    #[test]
1173    fn typed_from_str() {
1174        #[derive(serde::Deserialize, PartialEq, Debug)]
1175        struct T {
1176            name: String,
1177            values: Vec<i64>,
1178        }
1179        let t: T = from_str("name: x\nvalues: [1, 2]\n").unwrap();
1180        assert_eq!(
1181            t,
1182            T {
1183                name: "x".into(),
1184                values: vec![1, 2]
1185            }
1186        );
1187    }
1188}