use serde::Serialize;
use serde_json::Value as JsonValue;
use serde_yml::Value as YamlValue;
use std::{collections::HashMap, sync::Arc};
use toml::Value as TomlValue;
use crate::{error::Error, types::Frontmatter, Format, Value};
const SMALL_STRING_SIZE: usize = 24;
const MAX_NESTING_DEPTH: usize = 32;
const MAX_KEYS: usize = 1000;
#[derive(Debug, Clone, Copy)]
pub struct ParseOptions {
pub max_depth: usize,
pub max_keys: usize,
pub validate: bool,
}
impl Default for ParseOptions {
fn default() -> Self {
Self {
max_depth: MAX_NESTING_DEPTH,
max_keys: MAX_KEYS,
validate: true,
}
}
}
#[inline]
fn optimise_string(s: &str) -> String {
if s.len() <= SMALL_STRING_SIZE {
s.to_string()
} else {
let mut string = String::with_capacity(s.len());
string.push_str(s);
string
}
}
pub fn parse_with_options(
raw_front_matter: &str,
format: Format,
options: Option<ParseOptions>,
) -> Result<Frontmatter, Error> {
let options = options.unwrap_or_default();
if format == Format::Unsupported {
let err_msg = format!(
"Unsupported format: {:?}. Supported formats are YAML, TOML, and JSON.",
format
);
log::error!("{}", err_msg);
return Err(Error::ConversionError(err_msg));
}
let trimmed_content = raw_front_matter.trim();
match format {
Format::Yaml => {
if !trimmed_content.starts_with("---") {
log::debug!("YAML front matter validation: Content structure appears non-standard");
}
}
Format::Toml => {
if !trimmed_content.contains('=') {
return Err(Error::ConversionError(
"Format set to TOML but input does not contain '=' signs.".to_string(),
));
}
}
Format::Json => {
if !trimmed_content.starts_with('{') {
return Err(Error::ConversionError(
"Format set to JSON but input does not start with '{'."
.to_string(),
));
}
}
Format::Unsupported => unreachable!(), };
let front_matter = match format {
Format::Yaml => parse_yaml(trimmed_content).map_err(|e| {
log::error!("YAML parsing failed: {}", e);
e
})?,
Format::Toml => parse_toml(trimmed_content).map_err(|e| {
log::error!("TOML parsing failed: {}", e);
e
})?,
Format::Json => parse_json(trimmed_content).map_err(|e| {
log::error!("JSON parsing failed: {}", e);
e
})?,
Format::Unsupported => unreachable!(),
};
if options.validate {
log::debug!(
"Validating front matter: maximum allowed nesting depth is {}, maximum allowed number of keys is {}",
options.max_depth,
options.max_keys
);
validate_frontmatter(
&front_matter,
options.max_depth,
options.max_keys,
)
.map_err(|e| {
log::error!("Front matter validation failed: {}", e);
e
})?;
}
Ok(front_matter)
}
pub fn parse(
raw_front_matter: &str,
format: Format,
) -> Result<Frontmatter, Error> {
parse_with_options(raw_front_matter, format, None)
}
pub fn to_string(
front_matter: &Frontmatter,
format: Format,
) -> Result<String, Error> {
match format {
Format::Yaml => to_yaml(front_matter),
Format::Toml => to_toml(front_matter),
Format::Json => to_json_optimised(front_matter),
Format::Unsupported => Err(Error::ConversionError(
"Unsupported format".to_string(),
)),
}
}
fn parse_yaml(raw: &str) -> Result<Frontmatter, Error> {
let yaml_value: YamlValue = serde_yml::from_str(raw)
.map_err(|e| Error::YamlParseError { source: e.into() })?;
let capacity =
yaml_value.as_mapping().map_or(0, serde_yml::Mapping::len);
let mut front_matter =
Frontmatter(HashMap::with_capacity(capacity));
if let YamlValue::Mapping(mapping) = yaml_value {
for (key, value) in mapping {
if let YamlValue::String(k) = key {
let _ = front_matter.insert(k, yaml_to_value(&value));
} else {
log::warn!("Warning: Non-string key ignored in YAML front matter");
}
}
} else {
return Err(Error::ParseError(
"YAML front matter is not a valid mapping".to_string(),
));
}
Ok(front_matter)
}
fn yaml_to_value(yaml: &YamlValue) -> Value {
match yaml {
YamlValue::Null => Value::Null,
YamlValue::Bool(b) => Value::Boolean(*b),
YamlValue::Number(n) => {
n.as_i64()
.map_or_else(
|| {
n.as_f64().map_or_else(
|| {
log::warn!(
"Invalid or unsupported number encountered in YAML: {:?}",
n
);
Value::Number(0.0) },
Value::Number,
)
},
|i| {
if i.abs() < (1_i64 << 52) {
Value::Number(i as f64)
} else {
log::warn!(
"Integer {} exceeds precision of f64. Defaulting to 0.0",
i
);
Value::Number(0.0) }
},
)
}
YamlValue::String(s) => Value::String(optimise_string(s)),
YamlValue::Sequence(seq) => {
let mut vec = Vec::with_capacity(seq.len());
vec.extend(seq.iter().map(yaml_to_value));
Value::Array(vec)
}
YamlValue::Mapping(map) => {
let mut result =
Frontmatter(HashMap::with_capacity(map.len()));
for (k, v) in map {
if let YamlValue::String(key) = k {
let _ = result
.0
.insert(optimise_string(key), yaml_to_value(v));
} else {
log::warn!(
"Non-string key in YAML mapping ignored: {:?}",
k
);
}
}
Value::Object(Box::new(result))
}
YamlValue::Tagged(tagged) => Value::Tagged(
optimise_string(&tagged.tag.to_string()),
Box::new(yaml_to_value(&tagged.value)),
),
}
}
fn to_yaml(front_matter: &Frontmatter) -> Result<String, Error> {
serde_yml::to_string(&front_matter.0)
.map_err(|e| Error::ConversionError(e.to_string()))
}
fn parse_toml(raw: &str) -> Result<Frontmatter, Error> {
let toml_value: TomlValue =
raw.parse().map_err(Error::TomlParseError)?;
let capacity = match &toml_value {
TomlValue::Table(table) => table.len(),
_ => 0,
};
let mut front_matter =
Frontmatter(HashMap::with_capacity(capacity));
if let TomlValue::Table(table) = toml_value {
for (key, value) in table {
let _ = front_matter.0.insert(key, toml_to_value(&value));
}
}
Ok(front_matter)
}
fn toml_to_value(toml: &TomlValue) -> Value {
match toml {
TomlValue::String(s) => Value::String(optimise_string(s)),
TomlValue::Integer(i) => Value::Number(*i as f64),
TomlValue::Float(f) => Value::Number(*f),
TomlValue::Boolean(b) => Value::Boolean(*b),
TomlValue::Array(arr) => {
let mut vec = Vec::with_capacity(arr.len());
vec.extend(arr.iter().map(toml_to_value));
Value::Array(vec)
}
TomlValue::Table(table) => {
let mut result =
Frontmatter(HashMap::with_capacity(table.len()));
for (k, v) in table {
let _ = result
.0
.insert(optimise_string(k), toml_to_value(v));
}
Value::Object(Box::new(result))
}
TomlValue::Datetime(dt) => Value::String(dt.to_string()),
}
}
fn to_toml(front_matter: &Frontmatter) -> Result<String, Error> {
toml::to_string(&front_matter.0)
.map_err(|e| Error::ConversionError(e.to_string()))
}
fn parse_json(raw: &str) -> Result<Frontmatter, Error> {
let json_value: JsonValue = serde_json::from_str(raw)
.map_err(|e| Error::JsonParseError(Arc::new(e)))?;
let capacity = match &json_value {
JsonValue::Object(obj) => obj.len(),
_ => 0,
};
let mut front_matter =
Frontmatter(HashMap::with_capacity(capacity));
if let JsonValue::Object(obj) = json_value {
for (key, value) in obj {
let _ = front_matter.0.insert(key, json_to_value(&value));
}
}
Ok(front_matter)
}
fn json_to_value(json: &JsonValue) -> Value {
match json {
JsonValue::Null => Value::Null,
JsonValue::Bool(b) => Value::Boolean(*b),
JsonValue::Number(n) => n.as_i64().map_or_else(
|| {
if let Some(f) = n.as_f64() {
Value::Number(f)
} else {
Value::Number(0.0)
}
},
|i| Value::Number(i as f64),
),
JsonValue::String(s) => Value::String(optimise_string(s)),
JsonValue::Array(arr) => {
let mut vec = Vec::with_capacity(arr.len());
vec.extend(arr.iter().map(json_to_value));
Value::Array(vec)
}
JsonValue::Object(obj) => {
let mut result =
Frontmatter(HashMap::with_capacity(obj.len()));
for (k, v) in obj {
let _ = result
.0
.insert(optimise_string(k), json_to_value(v));
}
Value::Object(Box::new(result))
}
}
}
fn to_json_optimised(
front_matter: &Frontmatter,
) -> Result<String, Error> {
let estimated_size = estimate_json_size(front_matter);
let buf = Vec::with_capacity(estimated_size);
let formatter = serde_json::ser::CompactFormatter;
let mut ser =
serde_json::Serializer::with_formatter(buf, formatter);
front_matter
.0
.serialize(&mut ser)
.map_err(|e| Error::ConversionError(e.to_string()))?;
String::from_utf8(ser.into_inner())
.map_err(|e| Error::ConversionError(e.to_string()))
}
pub fn validate_frontmatter(
fm: &Frontmatter,
max_depth: usize,
max_keys: usize,
) -> Result<(), Error> {
if fm.0.len() > max_keys {
return Err(Error::ContentTooLarge {
size: fm.0.len(),
max: max_keys,
});
}
for value in fm.0.values() {
check_depth(value, 1, max_depth)?;
}
Ok(())
}
fn check_depth(
value: &Value,
current_depth: usize,
max_depth: usize,
) -> Result<(), Error> {
if current_depth > max_depth {
return Err(Error::NestingTooDeep {
depth: current_depth,
max: max_depth,
});
}
match value {
Value::Array(arr) => {
for item in arr {
check_depth(item, current_depth + 1, max_depth)?;
}
}
Value::Object(obj) => {
for v in obj.0.values() {
check_depth(v, current_depth + 1, max_depth)?;
}
}
_ => {}
}
Ok(())
}
fn estimate_json_size(fm: &Frontmatter) -> usize {
let mut size = 2; for (k, v) in &fm.0 {
size += k.len() + 3; size += estimate_value_size(v);
size += 1; }
size
}
fn estimate_value_size(value: &Value) -> usize {
match value {
Value::Null => 4, Value::String(s) => s.len() + 2, Value::Number(_) => 8, Value::Boolean(_) => 5, Value::Array(arr) => {
2 + arr.iter().map(estimate_value_size).sum::<usize>() }
Value::Object(obj) => estimate_json_size(obj),
Value::Tagged(tag, val) => {
tag.len() + 2 + estimate_value_size(val)
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::f64::consts::PI;
fn create_test_frontmatter() -> Frontmatter {
let mut fm = Frontmatter::new();
let _ = fm.insert(
"title".to_string(),
Value::String("Test".to_string()),
);
let _ = fm.insert("number".to_string(), Value::Number(PI));
let _ = fm.insert("boolean".to_string(), Value::Boolean(true));
let _ = fm.insert(
"array".to_string(),
Value::Array(vec![
Value::Number(1.0),
Value::Number(2.0),
Value::Number(3.0),
]),
);
fm
}
mod parse_options_tests {
use super::*;
#[test]
fn test_parse_options_default() {
let default_options = ParseOptions::default();
assert_eq!(default_options.max_depth, MAX_NESTING_DEPTH);
assert_eq!(default_options.max_keys, MAX_KEYS);
assert!(default_options.validate);
}
}
mod optimise_string_tests {
use super::*;
#[test]
fn test_optimise_string_short() {
let short_string = "short";
let optimised = optimise_string(short_string);
assert_eq!(optimised, short_string);
assert_eq!(optimised.capacity(), short_string.len());
}
#[test]
fn test_optimise_string_long() {
let long_string = "a".repeat(SMALL_STRING_SIZE + 1);
let optimised = optimise_string(&long_string);
assert_eq!(optimised, long_string);
assert!(optimised.capacity() >= long_string.len());
}
}
mod parsing_tests {
use super::*;
#[test]
fn test_parse_yaml() {
let yaml = "key: value";
let result = parse_yaml(yaml);
assert!(result.is_ok());
let fm = result.unwrap();
assert_eq!(
fm.0.get("key"),
Some(&Value::String("value".to_string()))
);
}
#[test]
fn test_parse_toml() {
let toml = "key = \"value\"";
let result = parse_toml(toml);
assert!(result.is_ok());
let fm = result.unwrap();
assert_eq!(
fm.0.get("key"),
Some(&Value::String("value".to_string()))
);
}
#[test]
fn test_parse_json() {
let json = r#"{"key": "value"}"#;
let result = parse_json(json);
assert!(result.is_ok());
let fm = result.unwrap();
assert_eq!(
fm.0.get("key"),
Some(&Value::String("value".to_string()))
);
}
#[test]
fn test_parse_with_options() {
let yaml = "key: value";
let result = parse_with_options(yaml, Format::Yaml, None);
assert!(result.is_ok());
let fm = result.unwrap();
assert_eq!(
fm.0.get("key"),
Some(&Value::String("value".to_string()))
);
}
#[test]
fn test_parse_with_invalid_format() {
let yaml = "key: value";
let result =
parse_with_options(yaml, Format::Unsupported, None);
assert!(matches!(result, Err(Error::ConversionError(_))));
}
}
mod serialization_tests {
use super::*;
#[test]
fn test_to_yaml() {
let fm = create_test_frontmatter();
let yaml = to_yaml(&fm).unwrap();
assert!(yaml.contains("title:"));
assert!(yaml.contains("Test"));
}
#[test]
fn test_to_toml() {
let fm = create_test_frontmatter();
let toml = to_toml(&fm).unwrap();
assert!(toml.contains("title = \"Test\""));
}
#[test]
fn test_to_json_optimised() {
let fm = create_test_frontmatter();
let json = to_json_optimised(&fm).unwrap();
assert!(json.contains("\"title\":\"Test\""));
}
#[test]
fn test_to_string() {
let fm = create_test_frontmatter();
let yaml = to_string(&fm, Format::Yaml).unwrap();
assert!(yaml.contains("title: Test"));
let toml = to_string(&fm, Format::Toml).unwrap();
assert!(toml.contains("title = \"Test\""));
let json = to_string(&fm, Format::Json).unwrap();
assert!(json.contains("\"title\":\"Test\""));
}
}
mod validation_tests {
use super::*;
#[test]
fn test_validate_frontmatter_valid() {
let fm = create_test_frontmatter();
assert!(validate_frontmatter(
&fm,
MAX_NESTING_DEPTH,
MAX_KEYS
)
.is_ok());
}
#[test]
fn test_validate_frontmatter_exceeds_keys() {
let mut fm = Frontmatter::new();
for i in 0..MAX_KEYS + 1 {
let _ = fm.insert(
i.to_string(),
Value::String("value".to_string()),
);
}
let result =
validate_frontmatter(&fm, MAX_NESTING_DEPTH, MAX_KEYS);
assert!(matches!(
result,
Err(Error::ContentTooLarge { .. })
));
}
#[test]
fn test_validate_frontmatter_exceeds_depth() {
let mut current = Value::Null;
for _ in 0..MAX_NESTING_DEPTH + 1 {
current = Value::Object(Box::new(Frontmatter(
[("nested".to_string(), current)]
.into_iter()
.collect(),
)));
}
let mut fm = Frontmatter::new();
let _ = fm.insert("deep".to_string(), current);
let result =
validate_frontmatter(&fm, MAX_NESTING_DEPTH, MAX_KEYS);
assert!(matches!(
result,
Err(Error::NestingTooDeep { .. })
));
}
}
mod utility_tests {
use super::*;
#[test]
fn test_estimate_json_size() {
let fm = create_test_frontmatter();
let estimated_size = estimate_json_size(&fm);
let actual_json = to_string(&fm, Format::Json).unwrap();
assert!(estimated_size >= actual_json.len());
}
#[test]
fn test_check_depth_valid() {
let value =
Value::Object(Box::new(create_test_frontmatter()));
assert!(check_depth(&value, 1, MAX_NESTING_DEPTH).is_ok());
}
#[test]
fn test_check_depth_exceeds() {
let mut current = Value::Null;
for _ in 0..MAX_NESTING_DEPTH + 1 {
current = Value::Object(Box::new(Frontmatter(
[("nested".to_string(), current)]
.into_iter()
.collect(),
)));
}
let result = check_depth(¤t, 1, MAX_NESTING_DEPTH);
assert!(matches!(
result,
Err(Error::NestingTooDeep { .. })
));
}
}
}