use crate::error::Result;
use rayon::prelude::*;
use serde::de::DeserializeOwned;
pub fn parse<T>(input: &str) -> Result<Vec<T>>
where
T: DeserializeOwned + Send + 'static,
{
let chunks = split(input);
chunks
.par_iter()
.map(|chunk| crate::from_str::<T>(chunk))
.collect::<Result<Vec<T>>>()
}
pub fn values(input: &str) -> Result<Vec<crate::Value>> {
parse::<crate::Value>(input)
}
#[must_use]
pub fn split(input: &str) -> Vec<&str> {
let bytes = input.as_bytes();
let mut markers: Vec<usize> = Vec::new();
let mut i = 0;
while i + 3 <= bytes.len() {
let at_line_start = i == 0 || bytes[i - 1] == b'\n' || bytes[i - 1] == b'\r';
if at_line_start && &bytes[i..i + 3] == b"---" {
let next_ok =
i + 3 >= bytes.len() || matches!(bytes[i + 3], b'\n' | b'\r' | b' ' | b'\t');
if next_ok {
markers.push(i);
i += 3;
continue;
}
}
i += 1;
}
if markers.is_empty() {
return if input.is_empty() {
Vec::new()
} else {
vec![input]
};
}
let mut docs: Vec<&str> = Vec::with_capacity(markers.len() + 1);
if markers[0] > 0 {
let pre = input[..markers[0]].trim();
if !pre.is_empty() {
docs.push(&input[..markers[0]]);
}
}
for window in markers.windows(2) {
docs.push(&input[window[0]..window[1]]);
}
let last = *markers.last().unwrap();
if last < input.len() {
let trailing = &input[last..];
if !trailing.trim_end().is_empty() {
docs.push(trailing);
}
}
docs
}
#[cfg(test)]
mod tests {
use super::*;
use serde::Deserialize;
#[test]
fn split_separates_three_records() {
let yaml = "---\nid: 1\n---\nid: 2\n---\nid: 3\n";
let docs = split(yaml);
assert_eq!(docs.len(), 3);
assert!(docs[0].contains("id: 1"));
assert!(docs[1].contains("id: 2"));
assert!(docs[2].contains("id: 3"));
}
#[test]
fn split_handles_no_separators() {
let yaml = "single: doc\n";
let docs = split(yaml);
assert_eq!(docs.len(), 1);
assert_eq!(docs[0], yaml);
}
#[test]
fn split_handles_empty_input() {
assert!(split("").is_empty());
}
#[test]
fn split_handles_implicit_first_doc() {
let yaml = "name: a\n---\nname: b\n";
let docs = split(yaml);
assert_eq!(docs.len(), 2);
assert!(docs[0].contains("name: a"));
assert!(docs[1].contains("name: b"));
}
#[test]
fn split_ignores_dashes_mid_line() {
let yaml = "key: value---suffix\n";
let docs = split(yaml);
assert_eq!(docs.len(), 1);
assert!(docs[0].contains("value---suffix"));
}
#[test]
fn split_requires_post_marker_whitespace() {
let yaml = "key: a\n---foo\nkey: b\n";
let docs = split(yaml);
assert_eq!(docs.len(), 1, "got: {docs:?}");
}
#[test]
fn parse_round_trips_typed_records() {
#[derive(Debug, Deserialize, PartialEq)]
struct Record {
id: u32,
}
let yaml = "---\nid: 1\n---\nid: 2\n---\nid: 3\n";
let records: Vec<Record> = parse(yaml).unwrap();
assert_eq!(
records,
vec![Record { id: 1 }, Record { id: 2 }, Record { id: 3 }]
);
}
#[test]
fn values_yields_value_per_document() {
let yaml = "---\na: 1\n---\nb: 2\n";
let docs = values(yaml).unwrap();
assert_eq!(docs.len(), 2);
assert_eq!(docs[0]["a"].as_i64(), Some(1));
assert_eq!(docs[1]["b"].as_i64(), Some(2));
}
#[test]
fn parse_propagates_first_error() {
#[derive(Debug, Deserialize)]
#[allow(dead_code)]
struct Record {
id: u32,
}
let yaml = "---\nid: 1\n---\nid: [\n";
let res: Result<Vec<Record>> = parse(yaml);
assert!(res.is_err());
}
#[test]
fn parse_matches_sequential_for_correctness() {
let mut yaml = String::new();
for i in 0..50 {
yaml.push_str(&format!("---\nid: {i}\nname: record-{i}\n"));
}
#[derive(Debug, Deserialize, PartialEq)]
struct Record {
id: u32,
name: String,
}
let parallel: Vec<Record> = parse(&yaml).unwrap();
let sequential: Vec<Record> = crate::load_all_as(&yaml).unwrap();
assert_eq!(parallel, sequential);
}
}