use serde::Serialize;
use serde_json::Value as JsonValue;
use serde_yml::Value as YmlValue;
use std::collections::HashMap;
use toml::Value as TomlValue;
use crate::{
error::FrontmatterError, types::Frontmatter, Format, Value,
};
const SMALL_STRING_SIZE: usize = 24;
const MAX_NESTING_DEPTH: usize = 32;
const MAX_KEYS: usize = 1000;
#[derive(Debug, Clone, Copy)]
pub struct ParseOptions {
pub max_depth: usize,
pub max_keys: usize,
pub validate: bool,
}
impl Default for ParseOptions {
fn default() -> Self {
Self {
max_depth: MAX_NESTING_DEPTH,
max_keys: MAX_KEYS,
validate: true,
}
}
}
#[inline]
fn optimize_string(s: &str) -> String {
if s.len() <= SMALL_STRING_SIZE {
s.to_string()
} else {
let mut string = String::with_capacity(s.len());
string.push_str(s);
string
}
}
pub fn parse_with_options(
raw_frontmatter: &str,
format: Format,
options: Option<ParseOptions>,
) -> Result<Frontmatter, FrontmatterError> {
let options = options.unwrap_or_default();
if format == Format::Yaml && raw_frontmatter.starts_with("---") {
eprintln!("Warning: Format set to YAML but input does not start with '---'");
}
if format == Format::Toml && !raw_frontmatter.contains('=') {
return Err(FrontmatterError::ConversionError(
"Format set to TOML but input does not contain '=' signs."
.to_string(),
));
}
if format == Format::Json && !raw_frontmatter.starts_with('{') {
return Err(FrontmatterError::ConversionError(
"Format set to JSON but input does not start with '{'."
.to_string(),
));
}
let frontmatter = match format {
Format::Yaml => parse_yaml(raw_frontmatter).map_err(|e| {
eprintln!("YAML parsing failed: {}", e);
e
})?,
Format::Toml => parse_toml(raw_frontmatter).map_err(|e| {
eprintln!("TOML parsing failed: {}", e);
e
})?,
Format::Json => parse_json(raw_frontmatter).map_err(|e| {
eprintln!("JSON parsing failed: {}", e);
e
})?,
Format::Unsupported => {
let err_msg = "Unsupported format provided".to_string();
eprintln!("{}", err_msg);
return Err(FrontmatterError::ConversionError(err_msg));
}
};
if options.validate {
println!(
"Validating frontmatter with max_depth={} and max_keys={}",
options.max_depth, options.max_keys
);
validate_frontmatter(
&frontmatter,
options.max_depth,
options.max_keys,
)
.map_err(|e| {
eprintln!("Validation failed: {}", e);
e
})?;
}
Ok(frontmatter)
}
pub fn parse(
raw_frontmatter: &str,
format: Format,
) -> Result<Frontmatter, FrontmatterError> {
parse_with_options(raw_frontmatter, format, None)
}
pub fn to_string(
frontmatter: &Frontmatter,
format: Format,
) -> Result<String, FrontmatterError> {
match format {
Format::Yaml => to_yaml(frontmatter),
Format::Toml => to_toml(frontmatter),
Format::Json => to_json_optimized(frontmatter),
Format::Unsupported => Err(FrontmatterError::ConversionError(
"Unsupported format".to_string(),
)),
}
}
fn parse_yaml(raw: &str) -> Result<Frontmatter, FrontmatterError> {
let yml_value: YmlValue = serde_yml::from_str(raw)
.map_err(|e| FrontmatterError::YamlParseError { source: e })?;
let capacity = yml_value.as_mapping().map_or(0, |m| m.len());
let mut frontmatter = Frontmatter(HashMap::with_capacity(capacity));
if let YmlValue::Mapping(mapping) = yml_value {
for (key, value) in mapping {
if let YmlValue::String(k) = key {
let _ = frontmatter.insert(k, yml_to_value(&value));
} else {
eprintln!("Warning: Non-string key ignored in YAML frontmatter");
}
}
} else {
return Err(FrontmatterError::ParseError(
"YAML frontmatter is not a valid mapping".to_string(),
));
}
Ok(frontmatter)
}
fn yml_to_value(yml: &YmlValue) -> Value {
match yml {
YmlValue::Null => Value::Null,
YmlValue::Bool(b) => Value::Boolean(*b),
YmlValue::Number(n) => {
if let Some(i) = n.as_i64() {
Value::Number(i as f64)
} else if let Some(f) = n.as_f64() {
Value::Number(f)
} else {
Value::Number(0.0)
}
}
YmlValue::String(s) => Value::String(optimize_string(s)),
YmlValue::Sequence(seq) => {
let mut vec = Vec::with_capacity(seq.len());
vec.extend(seq.iter().map(yml_to_value));
Value::Array(vec)
}
YmlValue::Mapping(map) => {
let mut result =
Frontmatter(HashMap::with_capacity(map.len()));
for (k, v) in map {
if let YmlValue::String(key) = k {
let _ = result
.0
.insert(optimize_string(key), yml_to_value(v));
}
}
Value::Object(Box::new(result))
}
YmlValue::Tagged(tagged) => Value::Tagged(
optimize_string(&tagged.tag.to_string()),
Box::new(yml_to_value(&tagged.value)),
),
}
}
fn to_yaml(
frontmatter: &Frontmatter,
) -> Result<String, FrontmatterError> {
serde_yml::to_string(&frontmatter.0)
.map_err(|e| FrontmatterError::ConversionError(e.to_string()))
}
fn parse_toml(raw: &str) -> Result<Frontmatter, FrontmatterError> {
let toml_value: TomlValue =
raw.parse().map_err(FrontmatterError::TomlParseError)?;
let capacity = match &toml_value {
TomlValue::Table(table) => table.len(),
_ => 0,
};
let mut frontmatter = Frontmatter(HashMap::with_capacity(capacity));
if let TomlValue::Table(table) = toml_value {
for (key, value) in table {
let _ = frontmatter.0.insert(key, toml_to_value(&value));
}
}
Ok(frontmatter)
}
fn toml_to_value(toml: &TomlValue) -> Value {
match toml {
TomlValue::String(s) => Value::String(optimize_string(s)),
TomlValue::Integer(i) => Value::Number(*i as f64),
TomlValue::Float(f) => Value::Number(*f),
TomlValue::Boolean(b) => Value::Boolean(*b),
TomlValue::Array(arr) => {
let mut vec = Vec::with_capacity(arr.len());
vec.extend(arr.iter().map(toml_to_value));
Value::Array(vec)
}
TomlValue::Table(table) => {
let mut result =
Frontmatter(HashMap::with_capacity(table.len()));
for (k, v) in table {
let _ = result
.0
.insert(optimize_string(k), toml_to_value(v));
}
Value::Object(Box::new(result))
}
TomlValue::Datetime(dt) => Value::String(dt.to_string()),
}
}
fn to_toml(
frontmatter: &Frontmatter,
) -> Result<String, FrontmatterError> {
toml::to_string(&frontmatter.0)
.map_err(|e| FrontmatterError::ConversionError(e.to_string()))
}
fn parse_json(raw: &str) -> Result<Frontmatter, FrontmatterError> {
let json_value: JsonValue = serde_json::from_str(raw)
.map_err(FrontmatterError::JsonParseError)?;
let capacity = match &json_value {
JsonValue::Object(obj) => obj.len(),
_ => 0,
};
let mut frontmatter = Frontmatter(HashMap::with_capacity(capacity));
if let JsonValue::Object(obj) = json_value {
for (key, value) in obj {
let _ = frontmatter.0.insert(key, json_to_value(&value));
}
}
Ok(frontmatter)
}
fn json_to_value(json: &JsonValue) -> Value {
match json {
JsonValue::Null => Value::Null,
JsonValue::Bool(b) => Value::Boolean(*b),
JsonValue::Number(n) => {
if let Some(i) = n.as_i64() {
Value::Number(i as f64)
} else if let Some(f) = n.as_f64() {
Value::Number(f)
} else {
Value::Number(0.0)
}
}
JsonValue::String(s) => Value::String(optimize_string(s)),
JsonValue::Array(arr) => {
let mut vec = Vec::with_capacity(arr.len());
vec.extend(arr.iter().map(json_to_value));
Value::Array(vec)
}
JsonValue::Object(obj) => {
let mut result =
Frontmatter(HashMap::with_capacity(obj.len()));
for (k, v) in obj {
let _ = result
.0
.insert(optimize_string(k), json_to_value(v));
}
Value::Object(Box::new(result))
}
}
}
fn to_json_optimized(
frontmatter: &Frontmatter,
) -> Result<String, FrontmatterError> {
let estimated_size = estimate_json_size(frontmatter);
let buf = Vec::with_capacity(estimated_size);
let formatter = serde_json::ser::CompactFormatter;
let mut ser =
serde_json::Serializer::with_formatter(buf, formatter);
frontmatter.0.serialize(&mut ser).map_err(|e| {
FrontmatterError::ConversionError(e.to_string())
})?;
String::from_utf8(ser.into_inner())
.map_err(|e| FrontmatterError::ConversionError(e.to_string()))
}
fn validate_frontmatter(
fm: &Frontmatter,
max_depth: usize,
max_keys: usize,
) -> Result<(), FrontmatterError> {
if fm.0.len() > max_keys {
return Err(FrontmatterError::ContentTooLarge {
size: fm.0.len(),
max: max_keys,
});
}
for value in fm.0.values() {
check_depth(value, 0, max_depth)?;
}
Ok(())
}
fn check_depth(
value: &Value,
current_depth: usize,
max_depth: usize,
) -> Result<(), FrontmatterError> {
if current_depth > max_depth {
return Err(FrontmatterError::NestingTooDeep {
depth: current_depth,
max: max_depth,
});
}
match value {
Value::Array(arr) => {
for item in arr {
check_depth(item, current_depth + 1, max_depth)?;
}
}
Value::Object(obj) => {
for v in obj.0.values() {
check_depth(v, current_depth + 1, max_depth)?;
}
}
_ => {}
}
Ok(())
}
fn estimate_json_size(fm: &Frontmatter) -> usize {
let mut size = 2; for (k, v) in &fm.0 {
size += k.len() + 3; size += estimate_value_size(v);
size += 1; }
size
}
fn estimate_value_size(value: &Value) -> usize {
match value {
Value::Null => 4, Value::String(s) => s.len() + 2, Value::Number(_) => 8, Value::Boolean(_) => 5, Value::Array(arr) => {
2 + arr.iter().map(estimate_value_size).sum::<usize>() }
Value::Object(obj) => estimate_json_size(obj),
Value::Tagged(tag, val) => {
tag.len() + 2 + estimate_value_size(val)
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::f64::consts::PI;
fn create_test_frontmatter() -> Frontmatter {
let mut fm = Frontmatter::new();
let _ = fm.insert(
"string".to_string(),
Value::String("test".to_string()),
);
let _ = fm.insert("number".to_string(), Value::Number(PI));
let _ = fm.insert("boolean".to_string(), Value::Boolean(true));
let _ = fm.insert(
"array".to_string(),
Value::Array(vec![
Value::Number(1.0),
Value::Number(2.0),
Value::Number(3.0),
]),
);
fm
}
#[test]
fn test_string_optimization() {
let short_str = "short";
let long_str = "a".repeat(SMALL_STRING_SIZE + 1);
let optimized_short = optimize_string(short_str);
let optimized_long = optimize_string(&long_str);
assert_eq!(optimized_short, short_str);
assert_eq!(optimized_long, long_str);
assert!(optimized_long.capacity() >= long_str.len());
}
#[test]
fn test_validation() {
let mut large_fm = Frontmatter::new();
for i in 0..MAX_KEYS + 1 {
let _ = large_fm.insert(
i.to_string(),
Value::String("value".to_string()),
);
}
assert!(validate_frontmatter(
&large_fm,
MAX_NESTING_DEPTH,
MAX_KEYS
)
.is_err());
let mut nested_fm = Frontmatter::new();
let mut current = Value::Null;
for _ in 0..MAX_NESTING_DEPTH + 1 {
current = Value::Object(Box::new(Frontmatter(
[("nested".to_string(), current)].into_iter().collect(),
)));
}
let _ = nested_fm.insert("deep".to_string(), current);
assert!(validate_frontmatter(
&nested_fm,
MAX_NESTING_DEPTH,
MAX_KEYS
)
.is_err());
}
#[test]
fn test_format_roundtrip() {
let original = create_test_frontmatter();
let yaml = to_string(&original, Format::Yaml).unwrap();
let from_yaml = parse(&yaml, Format::Yaml).unwrap();
assert_eq!(original, from_yaml);
let toml = to_string(&original, Format::Toml).unwrap();
let from_toml = parse(&toml, Format::Toml).unwrap();
assert_eq!(original, from_toml);
let json = to_string(&original, Format::Json).unwrap();
let from_json = parse(&json, Format::Json).unwrap();
assert_eq!(original, from_json);
}
#[test]
fn test_parse_options() {
let yaml = r#"
nested:
level1:
level2:
value: test
"#;
assert!(parse_with_options(yaml, Format::Yaml, None).is_ok());
let restricted_options = ParseOptions {
max_depth: 2,
max_keys: MAX_KEYS,
validate: true,
};
assert!(parse_with_options(
yaml,
Format::Yaml,
Some(restricted_options)
)
.is_err());
}
#[test]
fn test_error_handling() {
let invalid_yaml = "test: : invalid";
assert!(matches!(
parse(invalid_yaml, Format::Yaml),
Err(FrontmatterError::YamlParseError { .. })
));
let invalid_toml = "test = = invalid";
assert!(matches!(
parse(invalid_toml, Format::Toml),
Err(FrontmatterError::TomlParseError(_))
));
let invalid_json = "{invalid}";
assert!(matches!(
parse(invalid_json, Format::Json),
Err(FrontmatterError::JsonParseError(_))
));
}
#[test]
fn test_size_estimation() {
let fm = create_test_frontmatter();
let estimated_size = estimate_json_size(&fm);
let actual_json = to_string(&fm, Format::Json).unwrap();
assert!(estimated_size >= actual_json.len());
assert!(estimated_size <= actual_json.len() * 2);
}
}