hocon-parser 1.6.1

Full Lightbend HOCON specification-compliant parser for Rust
Documentation
use hocon::{tokenize, HoconError, TokenKind};

fn subst_segments(input: &str) -> Vec<(String, usize, usize)> {
    let tokens = tokenize(input).unwrap();
    let t = tokens
        .iter()
        .find(|t| t.kind == TokenKind::Substitution)
        .expect("subst token");
    let payload = t.subst.as_ref().expect("subst payload");
    payload
        .segments
        .iter()
        .map(|s| (s.text.clone(), s.line, s.col))
        .collect()
}

#[test]
fn segment_position_unquoted_path() {
    let segs = subst_segments("${foo.bar}");
    assert_eq!(segs[0].0, "foo");
    assert_eq!(segs[0].1, 1);
    // '$' is at col 1, '{' col 2, 'f' col 3
    assert_eq!(segs[0].2, 3);
    assert_eq!(segs[1].0, "bar");
    // After '.' at col 6, 'b' is at col 7
    assert_eq!(segs[1].2, 7);
}

#[test]
fn segment_position_quoted_dot_separator() {
    // ${"a"."b"}
    // '$' col 1, '{' col 2, '"' col 3, 'a' col 4, '"' col 5, '.' col 6, '"' col 7, 'b' col 8, '"' col 9, '}' col 10
    let segs = subst_segments(r#"${"a"."b"}"#);
    assert_eq!(segs[0].0, "a");
    assert_eq!(segs[0].2, 3); // opening '"' of first quoted run
    assert_eq!(segs[1].0, "b");
    assert_eq!(segs[1].2, 7); // opening '"' of second quoted run
}

#[test]
fn segment_position_multiline() {
    // line 1: "x=1\n"; line 2: "y=${foo}"
    // On line 2: 'y' col 1, '=' col 2, '$' col 3, '{' col 4, 'f' col 5
    let segs = subst_segments("x=1\ny=${foo}");
    assert_eq!(segs[0].0, "foo");
    assert_eq!(segs[0].1, 2);
    assert_eq!(segs[0].2, 5);
}

#[test]
fn segment_position_ws_concat() {
    // ${"a" "b"}
    // '$' col 1, '{' col 2, '"' col 3, 'a' col 4, '"' col 5, ' ' col 6, '"' col 7, 'b' col 8, '"' col 9, '}' col 10
    // Whitespace preserved: single segment text = "a b", position at first '"'
    let segs = subst_segments(r#"${"a" "b"}"#);
    assert_eq!(segs.len(), 1);
    assert_eq!(segs[0].0, "a b");
    assert_eq!(segs[0].2, 3);
}

#[test]
fn segment_position_empty_quoted_key() {
    // ${""}: '$' col 1, '{' col 2, '"' col 3, '"' col 4, '}' col 5
    let segs = subst_segments(r#"${""}"#);
    assert_eq!(segs.len(), 1);
    assert_eq!(segs[0].0, "");
    assert_eq!(segs[0].2, 3);
}

/// Extract `(line, col)` from a `HoconError::Parse` variant.
/// Panics if the error is not a Parse variant.
fn parse_err_pos(err: &HoconError) -> (usize, usize) {
    match err {
        HoconError::Parse(e) => (e.line, e.col),
        other => panic!("expected HoconError::Parse, got {:?}", other),
    }
}

#[test]
fn error_position_invalid_escape_inside_body() {
    // x=${"a\xb"}
    //  ^1234567890 1
    // The '\' of the invalid escape is at col 7. We assert the reported
    // position lies inside the ${...} body (cols 3..=11).
    let err = hocon::parse(r#"x=${"a\xb"}"#).unwrap_err();
    assert!(
        err.to_string().contains("invalid escape sequence"),
        "msg = {}",
        err
    );
    let (line, col) = parse_err_pos(&err);
    assert_eq!(line, 1, "line should be 1, got {} (err = {})", line, err);
    assert!(
        (3..=11).contains(&col),
        "col {} not in subst body [3, 11] (err = {})",
        col,
        err
    );
}

#[test]
fn error_position_empty_path() {
    let err = hocon::parse("x=${}").unwrap_err();
    assert!(
        err.to_string().contains("empty substitution path"),
        "err = {}",
        err
    );
    let (line, _col) = parse_err_pos(&err);
    assert_eq!(line, 1, "line should be 1, got {} (err = {})", line, err);
}

#[test]
fn surrogate_codepoint_rejected() {
    // \uD800 is a high surrogate codepoint, which is not a Unicode scalar value.
    // We intentionally reject it with "invalid unicode escape"; this differs
    // from Lightbend (Java accepts it because java.lang.String is a sequence
    // of 16-bit code units) and follows Rust `char` / Unicode scalar-value
    // constraints. See spec §"QUOTED reading rules" and the surrogate note.
    let err = hocon::parse(r#"x="a\uD800b""#).unwrap_err();
    let msg = err.to_string();
    assert!(msg.contains("invalid unicode escape"), "msg = {}", msg);
}

#[test]
fn surrogate_codepoint_rejected_inside_subst() {
    // Same intentional Rust-side rejection inside a substitution body.
    // See surrogate_codepoint_rejected for the Lightbend-divergence rationale.
    let err = hocon::parse(r#"x=${"a\uD800b"}"#).unwrap_err();
    let msg = err.to_string();
    assert!(msg.contains("invalid unicode escape"), "msg = {}", msg);
}

// ── S13c: `[]` suffix on substitutions ───────────────────────────────────────

/// `${X[]}` lexes into a Substitution token with list_suffix=true, segments=["X"].
#[test]
fn lex_subst_list_suffix_basic() {
    let tokens = hocon::tokenize("${X[]}").unwrap();
    let t = tokens
        .iter()
        .find(|t| t.kind == hocon::TokenKind::Substitution)
        .expect("Substitution token");
    let p = t.subst.as_ref().expect("SubstPayload");
    assert!(p.list_suffix, "list_suffix must be true for ${{X[]}}");
    assert_eq!(p.segments.len(), 1, "exactly one segment");
    assert_eq!(p.segments[0].text, "X");
    assert!(!p.optional);
}

/// Unit 2: `${?X[]}` — optional form with list_suffix.
#[test]
fn lex_subst_list_suffix_optional() {
    let tokens = hocon::tokenize("${?X[]}").unwrap();
    let t = tokens
        .iter()
        .find(|t| t.kind == hocon::TokenKind::Substitution)
        .expect("Substitution token");
    let p = t.subst.as_ref().expect("SubstPayload");
    assert!(p.list_suffix, "list_suffix must be true for ${{?X[]}}");
    assert!(p.optional);
    assert_eq!(p.segments[0].text, "X");
}

/// Unit 2: multi-segment path `${FOO.BAR[]}`.
#[test]
fn lex_subst_list_suffix_multipath() {
    let tokens = hocon::tokenize("${FOO.BAR[]}").unwrap();
    let p = tokens
        .iter()
        .find(|t| t.kind == hocon::TokenKind::Substitution)
        .and_then(|t| t.subst.as_ref())
        .expect("SubstPayload");
    assert!(p.list_suffix);
    assert_eq!(p.segments.len(), 2);
    assert_eq!(p.segments[0].text, "FOO");
    assert_eq!(p.segments[1].text, "BAR");
}

/// Unit 2: plain `${X}` must NOT set list_suffix.
#[test]
fn lex_subst_no_list_suffix_for_plain() {
    let tokens = hocon::tokenize("${X}").unwrap();
    let p = tokens
        .iter()
        .find(|t| t.kind == hocon::TokenKind::Substitution)
        .and_then(|t| t.subst.as_ref())
        .expect("SubstPayload");
    assert!(!p.list_suffix, "plain ${{X}} must NOT set list_suffix");
}

// ── Unit 4: E7 — whitespace before '[]' is allowed ───────────────────────────

/// ASCII space between path and `[]` is allowed (E7).
#[test]
fn lex_subst_list_suffix_e7_space() {
    // ${X []} — one space before `[`
    let tokens = hocon::tokenize("${X []}").unwrap();
    let p = tokens
        .iter()
        .find(|t| t.kind == hocon::TokenKind::Substitution)
        .and_then(|t| t.subst.as_ref())
        .expect("SubstPayload");
    assert!(p.list_suffix, "space before [] must still set list_suffix");
    assert_eq!(p.segments.len(), 1);
    assert_eq!(p.segments[0].text, "X");
}

/// ASCII tab between path and `[]` is allowed (E7).
#[test]
fn lex_subst_list_suffix_e7_tab() {
    // "${X\t[]}" — tab before `[`
    let input = "${X\t[]}";
    let tokens = hocon::tokenize(input).unwrap();
    let p = tokens
        .iter()
        .find(|t| t.kind == hocon::TokenKind::Substitution)
        .and_then(|t| t.subst.as_ref())
        .expect("SubstPayload");
    assert!(p.list_suffix, "tab before [] must still set list_suffix");
    assert_eq!(p.segments[0].text, "X");
}

// ── Unit 3: error shapes for malformed '[]' suffix ────────────────────────────

/// `${[]}` — empty path before `[]` suffix is a lex error.
#[test]
fn lex_subst_list_suffix_empty_path_errors() {
    let err = hocon::tokenize("${[]}").unwrap_err();
    let msg = err.to_string();
    assert!(
        msg.contains("empty segment before") || msg.contains("empty substitution path"),
        "expected empty-segment/path error, got: {}",
        msg
    );
}

/// `${X[}` — missing `]` is a lex error.
#[test]
fn lex_subst_list_suffix_missing_close_bracket_errors() {
    let err = hocon::tokenize("${X[}").unwrap_err();
    let msg = err.to_string();
    assert!(
        msg.contains("expected ']'"),
        "expected ']' missing error, got: {}",
        msg
    );
}

/// `${X[ ]}` — whitespace inside `[]` is a lex error (strict per spec Decision §1).
#[test]
fn lex_subst_list_suffix_whitespace_inside_brackets_errors() {
    let err = hocon::tokenize("${X[ ]}").unwrap_err();
    let msg = err.to_string();
    assert!(
        msg.contains("expected ']'"),
        "expected ']' missing error for whitespace inside [], got: {}",
        msg
    );
}

/// `${X[][]}` — double suffix is a lex error. After the literal `[]` suffix
/// is consumed, the `[` arm in `parse_subst_body` expects the closing `}` and
/// errors with "expected '}' after '[]' in substitution" because the next char
/// is `[`, not `}`. (Lex error fires inside parse_subst_body, not afterwards.)
#[test]
fn lex_subst_list_suffix_double_suffix_errors() {
    let result = hocon::tokenize("x = ${X[][]}");
    assert!(result.is_err(), "${{X[][]}} must be a lex/parse error");
}