1use serde::Serialize;
37use serde_json::Value as JsonValue;
38use serde_yml::Value as YamlValue;
39use std::{collections::HashMap, sync::Arc};
40use toml::Value as TomlValue;
41
42use crate::{error::Error, types::Frontmatter, Format, Value};
43
44const SMALL_STRING_SIZE: usize = 24;
46const MAX_NESTING_DEPTH: usize = 32;
47const MAX_KEYS: usize = 1000;
48
49#[derive(Debug, Clone, Copy)]
54pub struct ParseOptions {
55 pub max_depth: usize,
57 pub max_keys: usize,
59 pub validate: bool,
61}
62
63impl Default for ParseOptions {
64 fn default() -> Self {
65 Self {
66 max_depth: MAX_NESTING_DEPTH,
67 max_keys: MAX_KEYS,
68 validate: true,
69 }
70 }
71}
72
73#[inline]
86fn optimise_string(s: &str) -> String {
87 if s.len() <= SMALL_STRING_SIZE {
88 s.to_string()
89 } else {
90 let mut string = String::with_capacity(s.len());
91 string.push_str(s);
92 string
93 }
94}
95
96pub fn parse_with_options(
119 raw_front_matter: &str,
120 format: Format,
121 options: Option<ParseOptions>,
122) -> Result<Frontmatter, Error> {
123 let options = options.unwrap_or_default();
124
125 if format == Format::Unsupported {
127 let err_msg = format!(
128 "Unsupported format: {:?}. Supported formats are YAML, TOML, and JSON.",
129 format
130 );
131 log::error!("{}", err_msg);
132 return Err(Error::ConversionError(err_msg));
133 }
134
135 let trimmed_content = raw_front_matter.trim();
137
138 match format {
140 Format::Yaml => {
141 if !trimmed_content.starts_with("---") {
142 log::debug!("YAML front matter validation: Content structure appears non-standard");
143 }
144 }
145 Format::Toml => {
146 if !trimmed_content.contains('=') {
147 return Err(Error::ConversionError(
148 "Format set to TOML but input does not contain '=' signs.".to_string(),
149 ));
150 }
151 }
152 Format::Json => {
153 if !trimmed_content.starts_with('{') {
154 return Err(Error::ConversionError(
155 "Format set to JSON but input does not start with '{'."
156 .to_string(),
157 ));
158 }
159 }
160 Format::Unsupported => unreachable!(), };
162
163 let front_matter = match format {
164 Format::Yaml => parse_yaml(trimmed_content).map_err(|e| {
165 log::error!("YAML parsing failed: {}", e);
166 e
167 })?,
168 Format::Toml => parse_toml(trimmed_content).map_err(|e| {
169 log::error!("TOML parsing failed: {}", e);
170 e
171 })?,
172 Format::Json => parse_json(trimmed_content).map_err(|e| {
173 log::error!("JSON parsing failed: {}", e);
174 e
175 })?,
176 Format::Unsupported => unreachable!(),
177 };
178
179 if options.validate {
181 log::debug!(
182 "Validating front matter: maximum allowed nesting depth is {}, maximum allowed number of keys is {}",
183 options.max_depth,
184 options.max_keys
185 );
186
187 validate_frontmatter(
188 &front_matter,
189 options.max_depth,
190 options.max_keys,
191 )
192 .map_err(|e| {
193 log::error!("Front matter validation failed: {}", e);
194 e
195 })?;
196 }
197
198 Ok(front_matter)
199}
200
201pub fn parse(
219 raw_front_matter: &str,
220 format: Format,
221) -> Result<Frontmatter, Error> {
222 parse_with_options(raw_front_matter, format, None)
223}
224
225pub fn to_string(
242 front_matter: &Frontmatter,
243 format: Format,
244) -> Result<String, Error> {
245 match format {
246 Format::Yaml => to_yaml(front_matter),
247 Format::Toml => to_toml(front_matter),
248 Format::Json => to_json_optimised(front_matter),
249 Format::Unsupported => Err(Error::ConversionError(
250 "Unsupported format".to_string(),
251 )),
252 }
253}
254
255fn parse_yaml(raw: &str) -> Result<Frontmatter, Error> {
268 let yaml_value: YamlValue = serde_yml::from_str(raw)
270 .map_err(|e| Error::YamlParseError { source: e.into() })?;
271
272 let capacity =
274 yaml_value.as_mapping().map_or(0, serde_yml::Mapping::len);
275 let mut front_matter =
276 Frontmatter(HashMap::with_capacity(capacity));
277
278 if let YamlValue::Mapping(mapping) = yaml_value {
280 for (key, value) in mapping {
281 if let YamlValue::String(k) = key {
282 let _ = front_matter.insert(k, yaml_to_value(&value));
283 } else {
284 log::warn!("Warning: Non-string key ignored in YAML front matter");
286 }
287 }
288 } else {
289 return Err(Error::ParseError(
290 "YAML front matter is not a valid mapping".to_string(),
291 ));
292 }
293
294 Ok(front_matter)
295}
296
297fn yaml_to_value(yaml: &YamlValue) -> Value {
299 match yaml {
300 YamlValue::Null => Value::Null,
301 YamlValue::Bool(b) => Value::Boolean(*b),
302 YamlValue::Number(n) => {
303 n.as_i64()
304 .map_or_else(
305 || {
306 n.as_f64().map_or_else(
307 || {
308 log::warn!(
309 "Invalid or unsupported number encountered in YAML: {:?}",
310 n
311 );
312 Value::Number(0.0) },
314 Value::Number,
315 )
316 },
317 |i| {
318 if i.abs() < (1_i64 << 52) {
319 Value::Number(i as f64)
320 } else {
321 log::warn!(
322 "Integer {} exceeds precision of f64. Defaulting to 0.0",
323 i
324 );
325 Value::Number(0.0) }
327 },
328 )
329 }
330 YamlValue::String(s) => Value::String(optimise_string(s)),
331 YamlValue::Sequence(seq) => {
332 let mut vec = Vec::with_capacity(seq.len());
333 vec.extend(seq.iter().map(yaml_to_value));
334 Value::Array(vec)
335 }
336 YamlValue::Mapping(map) => {
337 let mut result =
338 Frontmatter(HashMap::with_capacity(map.len()));
339 for (k, v) in map {
340 if let YamlValue::String(key) = k {
341 let _ = result
342 .0
343 .insert(optimise_string(key), yaml_to_value(v));
344 } else {
345 log::warn!(
346 "Non-string key in YAML mapping ignored: {:?}",
347 k
348 );
349 }
350 }
351 Value::Object(Box::new(result))
352 }
353 YamlValue::Tagged(tagged) => Value::Tagged(
354 optimise_string(&tagged.tag.to_string()),
355 Box::new(yaml_to_value(&tagged.value)),
356 ),
357 }
358}
359
360fn to_yaml(front_matter: &Frontmatter) -> Result<String, Error> {
370 serde_yml::to_string(&front_matter.0)
371 .map_err(|e| Error::ConversionError(e.to_string()))
372}
373
374fn parse_toml(raw: &str) -> Result<Frontmatter, Error> {
387 let toml_value: TomlValue =
388 raw.parse().map_err(Error::TomlParseError)?;
389
390 let capacity = match &toml_value {
391 TomlValue::Table(table) => table.len(),
392 _ => 0,
393 };
394
395 let mut front_matter =
396 Frontmatter(HashMap::with_capacity(capacity));
397
398 if let TomlValue::Table(table) = toml_value {
399 for (key, value) in table {
400 let _ = front_matter.0.insert(key, toml_to_value(&value));
401 }
402 }
403
404 Ok(front_matter)
405}
406
407fn toml_to_value(toml: &TomlValue) -> Value {
409 match toml {
410 TomlValue::String(s) => Value::String(optimise_string(s)),
411 TomlValue::Integer(i) => Value::Number(*i as f64),
412 TomlValue::Float(f) => Value::Number(*f),
413 TomlValue::Boolean(b) => Value::Boolean(*b),
414 TomlValue::Array(arr) => {
415 let mut vec = Vec::with_capacity(arr.len());
416 vec.extend(arr.iter().map(toml_to_value));
417 Value::Array(vec)
418 }
419 TomlValue::Table(table) => {
420 let mut result =
421 Frontmatter(HashMap::with_capacity(table.len()));
422 for (k, v) in table {
423 let _ = result
424 .0
425 .insert(optimise_string(k), toml_to_value(v));
426 }
427 Value::Object(Box::new(result))
428 }
429 TomlValue::Datetime(dt) => Value::String(dt.to_string()),
430 }
431}
432
433fn to_toml(front_matter: &Frontmatter) -> Result<String, Error> {
443 toml::to_string(&front_matter.0)
444 .map_err(|e| Error::ConversionError(e.to_string()))
445}
446
447fn parse_json(raw: &str) -> Result<Frontmatter, Error> {
460 let json_value: JsonValue = serde_json::from_str(raw)
461 .map_err(|e| Error::JsonParseError(Arc::new(e)))?;
462
463 let capacity = match &json_value {
464 JsonValue::Object(obj) => obj.len(),
465 _ => 0,
466 };
467
468 let mut front_matter =
469 Frontmatter(HashMap::with_capacity(capacity));
470
471 if let JsonValue::Object(obj) = json_value {
472 for (key, value) in obj {
473 let _ = front_matter.0.insert(key, json_to_value(&value));
474 }
475 }
476
477 Ok(front_matter)
478}
479
480fn json_to_value(json: &JsonValue) -> Value {
482 match json {
483 JsonValue::Null => Value::Null,
484 JsonValue::Bool(b) => Value::Boolean(*b),
485 JsonValue::Number(n) => n.as_i64().map_or_else(
486 || {
487 if let Some(f) = n.as_f64() {
488 Value::Number(f)
489 } else {
490 Value::Number(0.0)
491 }
492 },
493 |i| Value::Number(i as f64),
494 ),
495 JsonValue::String(s) => Value::String(optimise_string(s)),
496 JsonValue::Array(arr) => {
497 let mut vec = Vec::with_capacity(arr.len());
498 vec.extend(arr.iter().map(json_to_value));
499 Value::Array(vec)
500 }
501 JsonValue::Object(obj) => {
502 let mut result =
503 Frontmatter(HashMap::with_capacity(obj.len()));
504 for (k, v) in obj {
505 let _ = result
506 .0
507 .insert(optimise_string(k), json_to_value(v));
508 }
509 Value::Object(Box::new(result))
510 }
511 }
512}
513
514fn to_json_optimised(
524 front_matter: &Frontmatter,
525) -> Result<String, Error> {
526 let estimated_size = estimate_json_size(front_matter);
527 let buf = Vec::with_capacity(estimated_size);
528 let formatter = serde_json::ser::CompactFormatter;
529 let mut ser =
530 serde_json::Serializer::with_formatter(buf, formatter);
531
532 front_matter
533 .0
534 .serialize(&mut ser)
535 .map_err(|e| Error::ConversionError(e.to_string()))?;
536
537 String::from_utf8(ser.into_inner())
538 .map_err(|e| Error::ConversionError(e.to_string()))
539}
540
541pub fn validate_frontmatter(
567 fm: &Frontmatter,
568 max_depth: usize,
569 max_keys: usize,
570) -> Result<(), Error> {
571 if fm.0.len() > max_keys {
572 return Err(Error::ContentTooLarge {
573 size: fm.0.len(),
574 max: max_keys,
575 });
576 }
577
578 for value in fm.0.values() {
580 check_depth(value, 1, max_depth)?;
581 }
582
583 Ok(())
584}
585
586fn check_depth(
598 value: &Value,
599 current_depth: usize,
600 max_depth: usize,
601) -> Result<(), Error> {
602 if current_depth > max_depth {
603 return Err(Error::NestingTooDeep {
604 depth: current_depth,
605 max: max_depth,
606 });
607 }
608
609 match value {
610 Value::Array(arr) => {
611 for item in arr {
612 check_depth(item, current_depth + 1, max_depth)?;
613 }
614 }
615 Value::Object(obj) => {
616 for v in obj.0.values() {
617 check_depth(v, current_depth + 1, max_depth)?;
618 }
619 }
620 _ => {}
621 }
622
623 Ok(())
624}
625
626fn estimate_json_size(fm: &Frontmatter) -> usize {
638 let mut size = 2; for (k, v) in &fm.0 {
640 size += k.len() + 3; size += estimate_value_size(v);
642 size += 1; }
644 size
645}
646
647fn estimate_value_size(value: &Value) -> usize {
657 match value {
658 Value::Null => 4, Value::String(s) => s.len() + 2, Value::Number(_) => 8, Value::Boolean(_) => 5, Value::Array(arr) => {
663 2 + arr.iter().map(estimate_value_size).sum::<usize>() }
665 Value::Object(obj) => estimate_json_size(obj),
666 Value::Tagged(tag, val) => {
667 tag.len() + 2 + estimate_value_size(val)
668 }
669 }
670}
671
672#[cfg(test)]
673mod tests {
674 use super::*;
675 use std::f64::consts::PI;
676
677 fn create_test_frontmatter() -> Frontmatter {
679 let mut fm = Frontmatter::new();
680 let _ = fm.insert(
681 "title".to_string(),
682 Value::String("Test".to_string()),
683 );
684 let _ = fm.insert("number".to_string(), Value::Number(PI));
685 let _ = fm.insert("boolean".to_string(), Value::Boolean(true));
686 let _ = fm.insert(
687 "array".to_string(),
688 Value::Array(vec![
689 Value::Number(1.0),
690 Value::Number(2.0),
691 Value::Number(3.0),
692 ]),
693 );
694 fm
695 }
696
697 mod parse_options_tests {
699 use super::*;
700
701 #[test]
702 fn test_parse_options_default() {
703 let default_options = ParseOptions::default();
704 assert_eq!(default_options.max_depth, MAX_NESTING_DEPTH);
705 assert_eq!(default_options.max_keys, MAX_KEYS);
706 assert!(default_options.validate);
707 }
708 }
709
710 mod optimise_string_tests {
712 use super::*;
713
714 #[test]
715 fn test_optimise_string_short() {
716 let short_string = "short";
717 let optimised = optimise_string(short_string);
718 assert_eq!(optimised, short_string);
719 assert_eq!(optimised.capacity(), short_string.len());
720 }
721
722 #[test]
723 fn test_optimise_string_long() {
724 let long_string = "a".repeat(SMALL_STRING_SIZE + 1);
725 let optimised = optimise_string(&long_string);
726 assert_eq!(optimised, long_string);
727 assert!(optimised.capacity() >= long_string.len());
728 }
729 }
730
731 mod parsing_tests {
733 use super::*;
734
735 #[test]
736 fn test_parse_yaml() {
737 let yaml = "key: value";
738 let result = parse_yaml(yaml);
739 assert!(result.is_ok());
740 let fm = result.unwrap();
741 assert_eq!(
742 fm.0.get("key"),
743 Some(&Value::String("value".to_string()))
744 );
745 }
746
747 #[test]
748 fn test_parse_toml() {
749 let toml = "key = \"value\"";
750 let result = parse_toml(toml);
751 assert!(result.is_ok());
752 let fm = result.unwrap();
753 assert_eq!(
754 fm.0.get("key"),
755 Some(&Value::String("value".to_string()))
756 );
757 }
758
759 #[test]
760 fn test_parse_json() {
761 let json = r#"{"key": "value"}"#;
762 let result = parse_json(json);
763 assert!(result.is_ok());
764 let fm = result.unwrap();
765 assert_eq!(
766 fm.0.get("key"),
767 Some(&Value::String("value".to_string()))
768 );
769 }
770
771 #[test]
772 fn test_parse_with_options() {
773 let yaml = "key: value";
774 let result = parse_with_options(yaml, Format::Yaml, None);
775 assert!(result.is_ok());
776 let fm = result.unwrap();
777 assert_eq!(
778 fm.0.get("key"),
779 Some(&Value::String("value".to_string()))
780 );
781 }
782
783 #[test]
784 fn test_parse_with_invalid_format() {
785 let yaml = "key: value";
786 let result =
787 parse_with_options(yaml, Format::Unsupported, None);
788 assert!(matches!(result, Err(Error::ConversionError(_))));
789 }
790 }
791
792 mod serialization_tests {
794 use super::*;
795
796 #[test]
797 fn test_to_yaml() {
798 let fm = create_test_frontmatter();
799 let yaml = to_yaml(&fm).unwrap();
800 assert!(yaml.contains("title:"));
801 assert!(yaml.contains("Test"));
802 }
803
804 #[test]
805 fn test_to_toml() {
806 let fm = create_test_frontmatter();
807 let toml = to_toml(&fm).unwrap();
808 assert!(toml.contains("title = \"Test\""));
809 }
810
811 #[test]
812 fn test_to_json_optimised() {
813 let fm = create_test_frontmatter();
814 let json = to_json_optimised(&fm).unwrap();
815 assert!(json.contains("\"title\":\"Test\""));
816 }
817
818 #[test]
819 fn test_to_string() {
820 let fm = create_test_frontmatter();
821
822 let yaml = to_string(&fm, Format::Yaml).unwrap();
824 assert!(yaml.contains("title: Test"));
825
826 let toml = to_string(&fm, Format::Toml).unwrap();
828 assert!(toml.contains("title = \"Test\""));
829
830 let json = to_string(&fm, Format::Json).unwrap();
832 assert!(json.contains("\"title\":\"Test\""));
833 }
834 }
835
836 mod validation_tests {
838 use super::*;
839
840 #[test]
841 fn test_validate_frontmatter_valid() {
842 let fm = create_test_frontmatter();
843 assert!(validate_frontmatter(
844 &fm,
845 MAX_NESTING_DEPTH,
846 MAX_KEYS
847 )
848 .is_ok());
849 }
850
851 #[test]
852 fn test_validate_frontmatter_exceeds_keys() {
853 let mut fm = Frontmatter::new();
854 for i in 0..MAX_KEYS + 1 {
855 let _ = fm.insert(
856 i.to_string(),
857 Value::String("value".to_string()),
858 );
859 }
860 let result =
861 validate_frontmatter(&fm, MAX_NESTING_DEPTH, MAX_KEYS);
862 assert!(matches!(
863 result,
864 Err(Error::ContentTooLarge { .. })
865 ));
866 }
867
868 #[test]
869 fn test_validate_frontmatter_exceeds_depth() {
870 let mut current = Value::Null;
871 for _ in 0..MAX_NESTING_DEPTH + 1 {
872 current = Value::Object(Box::new(Frontmatter(
873 [("nested".to_string(), current)]
874 .into_iter()
875 .collect(),
876 )));
877 }
878 let mut fm = Frontmatter::new();
879 let _ = fm.insert("deep".to_string(), current);
880 let result =
881 validate_frontmatter(&fm, MAX_NESTING_DEPTH, MAX_KEYS);
882 assert!(matches!(
883 result,
884 Err(Error::NestingTooDeep { .. })
885 ));
886 }
887 }
888
889 mod utility_tests {
891 use super::*;
892
893 #[test]
894 fn test_estimate_json_size() {
895 let fm = create_test_frontmatter();
896 let estimated_size = estimate_json_size(&fm);
897 let actual_json = to_string(&fm, Format::Json).unwrap();
898 assert!(estimated_size >= actual_json.len());
899 }
900
901 #[test]
902 fn test_check_depth_valid() {
903 let value =
904 Value::Object(Box::new(create_test_frontmatter()));
905 assert!(check_depth(&value, 1, MAX_NESTING_DEPTH).is_ok());
906 }
907
908 #[test]
909 fn test_check_depth_exceeds() {
910 let mut current = Value::Null;
911 for _ in 0..MAX_NESTING_DEPTH + 1 {
912 current = Value::Object(Box::new(Frontmatter(
913 [("nested".to_string(), current)]
914 .into_iter()
915 .collect(),
916 )));
917 }
918 let result = check_depth(¤t, 1, MAX_NESTING_DEPTH);
919 assert!(matches!(
920 result,
921 Err(Error::NestingTooDeep { .. })
922 ));
923 }
924 }
925}