stream-rs 0.1.0

Zero-dependency, spec-compliant streaming toolkit for LLM responses (SSE, incremental JSON, OpenAI/Anthropic delta accumulators).
Documentation
//! Tests for the incremental JSON value splitter.

use stream_rs::incremental_json::{FinishError, JsonSplitter};

fn split_all(input: &[u8]) -> Vec<String> {
    let mut s = JsonSplitter::new();
    let mut out = Vec::new();
    s.feed(input, &mut out);
    out
}

#[test]
fn single_object() {
    assert_eq!(split_all(br#"{"a":1}"#), vec![r#"{"a":1}"#]);
}

#[test]
fn concatenated_objects_no_separator() {
    assert_eq!(
        split_all(br#"{"a":1}{"b":2}"#),
        vec![r#"{"a":1}"#, r#"{"b":2}"#]
    );
}

#[test]
fn ndjson_with_newlines() {
    assert_eq!(
        split_all(b"{\"a\":1}\n{\"b\":2}\n"),
        vec![r#"{"a":1}"#, r#"{"b":2}"#]
    );
}

#[test]
fn comma_separated_values() {
    assert_eq!(split_all(b"[1],[2]"), vec!["[1]", "[2]"]);
}

#[test]
fn braces_inside_strings_do_not_count() {
    assert_eq!(split_all(br#"{"k":"}{"}"#), vec![r#"{"k":"}{"}"#]);
}

#[test]
fn escaped_quote_inside_string() {
    assert_eq!(split_all(br#"{"k":"a\"b"}"#), vec![r#"{"k":"a\"b"}"#]);
}

#[test]
fn nested_structures() {
    let input = br#"{"a":{"b":[1,{"c":2}]}}"#;
    assert_eq!(split_all(input), vec![std::str::from_utf8(input).unwrap()]);
}

#[test]
fn split_across_chunks() {
    let mut s = JsonSplitter::new();
    let mut out = Vec::new();
    s.feed(br#"{"a":1}{"b":"#, &mut out);
    assert_eq!(out, vec![r#"{"a":1}"#]);
    assert!(s.has_partial());

    out.clear();
    s.feed(br"[1,2]}", &mut out);
    assert_eq!(out, vec![r#"{"b":[1,2]}"#]);
    assert!(!s.has_partial());
}

#[test]
fn bare_scalar_values_with_separators() {
    // A bare scalar is only complete once a following separator is seen, so a
    // trailing separator is required to flush the last one.
    assert_eq!(split_all(b"1 2 3 "), vec!["1", "2", "3"]);
    assert_eq!(
        split_all(b"true false null\n"),
        vec!["true", "false", "null"]
    );
}

#[test]
fn trailing_bare_scalar_without_separator_stays_partial() {
    // Without a terminator the final scalar cannot be known to be complete.
    let mut s = JsonSplitter::new();
    let mut out = Vec::new();
    s.feed(b"1 2 3", &mut out);
    assert_eq!(out, vec!["1", "2"]);
    assert!(s.has_partial());
}

#[test]
fn finish_flushes_trailing_bare_scalar() {
    let mut s = JsonSplitter::new();
    let mut out = Vec::new();
    s.feed(b"1 2 3", &mut out);
    assert_eq!(out, vec!["1", "2"]);

    // The final scalar had no trailing separator; finish() flushes it cleanly.
    s.finish(&mut out)
        .expect("trailing scalar is a complete value");
    assert_eq!(out, vec!["1", "2", "3"]);
    assert!(!s.has_partial());
}

#[test]
fn finish_on_clean_boundary_is_ok_and_emits_nothing() {
    let mut s = JsonSplitter::new();
    let mut out = Vec::new();
    s.feed(br#"{"a":1}"#, &mut out);
    assert_eq!(out, vec![r#"{"a":1}"#]);

    let before = out.len();
    assert!(s.finish(&mut out).is_ok());
    assert_eq!(
        out.len(),
        before,
        "nothing extra emitted on a clean boundary"
    );
}

#[test]
fn finish_reports_truncated_object() {
    let mut s = JsonSplitter::new();
    let mut out = Vec::new();
    s.feed(br#"{"a":1}{"b":"#, &mut out);
    assert_eq!(out, vec![r#"{"a":1}"#]);

    let err = s.finish(&mut out).expect_err("second object is truncated");
    let FinishError::Truncated(err) = err else {
        panic!("expected a truncation error, got {err:?}");
    };
    assert!(err.buffered > 0);
    // The truncated bytes are not emitted as a value.
    assert_eq!(out, vec![r#"{"a":1}"#]);
    // State is reset for reuse.
    assert!(!s.has_partial());
}

#[test]
fn finish_reports_truncated_string() {
    let mut s = JsonSplitter::new();
    let mut out = Vec::new();
    s.feed(br#"{"a":"unterminated"#, &mut out);
    assert!(out.is_empty());
    let err = s.finish(&mut out).expect_err("string was never closed");
    let FinishError::Truncated(err) = err else {
        panic!("expected a truncation error, got {err:?}");
    };
    assert!(err.buffered > 0);
}

#[test]
fn splitter_is_reusable_after_finish() {
    let mut s = JsonSplitter::new();
    let mut out = Vec::new();
    s.feed(b"42", &mut out);
    s.finish(&mut out).unwrap();
    assert_eq!(out, vec!["42"]);

    out.clear();
    s.feed(br#"{"x":1}"#, &mut out);
    s.finish(&mut out).unwrap();
    assert_eq!(out, vec![r#"{"x":1}"#]);
}

#[test]
fn byte_at_a_time_matches_bulk() {
    let input = br#"{"a":1}{"b":[2,3]}{"c":"x"}"#;
    let bulk = split_all(input);

    let mut s = JsonSplitter::new();
    let mut drip = Vec::new();
    for &b in input {
        s.feed(&[b], &mut drip);
    }
    assert_eq!(bulk, drip);
}

#[test]
fn multibyte_utf8_value_split_across_chunks() {
    // A multibyte string value split mid-codepoint across feed() calls must be
    // buffered as raw bytes and only decoded once the value is complete, so the
    // emitted value is byte-for-byte the original JSON (no replacement chars).
    let value = "{\"city\":\"caf\u{00E9} \u{1F600}\"}";
    let bytes = value.as_bytes();

    let mut s = JsonSplitter::new();
    let mut out = Vec::new();
    // Split at every byte boundary, which dissects the multibyte codepoints.
    for &b in bytes {
        s.feed(&[b], &mut out);
    }
    s.finish(&mut out).unwrap();

    assert_eq!(out, vec![value.to_string()]);
}

#[test]
fn multibyte_byte_at_a_time_matches_bulk() {
    let input = "{\"t\":\"\u{65E5}\u{672C}\u{8A9E}\"}{\"e\":\"\u{1F600}\u{1F389}\"}".as_bytes();
    let bulk = split_all(input);

    let mut s = JsonSplitter::new();
    let mut drip = Vec::new();
    for &b in input {
        s.feed(&[b], &mut drip);
    }
    assert_eq!(bulk, drip);
    assert_eq!(bulk.len(), 2);
}

#[test]
fn non_strict_emits_stray_close_bracket_as_value() {
    // Documented historical behaviour: a leading stray `}` is emitted as a
    // one-byte "value" rather than rejected.
    assert_eq!(split_all(br#"}{"a":1}"#), vec!["}", r#"{"a":1}"#]);
}

#[test]
fn strict_rejects_stray_close_bracket() {
    let mut s = JsonSplitter::strict();
    let mut out = Vec::new();
    s.feed(br#"}{"a":1}"#, &mut out);

    // The stray `}` is dropped; the well-formed object still flows through.
    assert_eq!(out, vec![r#"{"a":1}"#]);
    assert!(s.error().is_some());
    assert_eq!(s.error().unwrap().byte, b'}');

    let err = s
        .finish(&mut out)
        .expect_err("framing violation surfaces on finish");
    let FinishError::Malformed(err) = err else {
        panic!("expected a malformed-framing error, got {err:?}");
    };
    assert_eq!(err.byte, b'}');
}

#[test]
fn strict_reports_value_index_of_violation() {
    // The stray bracket appears after one good value has been emitted.
    let mut s = JsonSplitter::strict();
    let mut out = Vec::new();
    s.feed(br#"{"a":1}]{"b":2}"#, &mut out);
    assert_eq!(out, vec![r#"{"a":1}"#, r#"{"b":2}"#]);
    let err = s.error().expect("a violation was recorded");
    assert_eq!(err.byte, b']');
    assert_eq!(err.values_emitted, 1);
}

#[test]
fn strict_clean_stream_has_no_error() {
    let mut s = JsonSplitter::strict();
    let mut out = Vec::new();
    s.feed(br#"{"a":1}{"b":2}"#, &mut out);
    assert!(s.error().is_none());
    assert!(s.finish(&mut out).is_ok());
    assert_eq!(out, vec![r#"{"a":1}"#, r#"{"b":2}"#]);
}

#[test]
fn strict_resets_error_after_finish() {
    let mut s = JsonSplitter::strict();
    let mut out = Vec::new();
    s.feed(b"]", &mut out);
    assert!(s.finish(&mut out).is_err());
    // Mode is preserved, error is cleared: the splitter is reusable.
    assert!(s.error().is_none());
    s.feed(br#"{"ok":true}"#, &mut out);
    assert!(s.finish(&mut out).is_ok());
    assert_eq!(out, vec![r#"{"ok":true}"#]);
}