#![allow(clippy::missing_errors_doc)]
use anyhow::{Context, Result};
use indexmap::IndexMap;
use serde_json::Value;
use serde_saphyr::{Budget, DuplicateKeyPolicy, Options, SerializerOptions};
use std::fs::File;
use std::io::{BufRead, BufReader, Read, Seek, SeekFrom};
use std::path::Path;
use super::FrontmatterError;
macro_rules! parse_bail {
($($arg:tt)*) => {
return Err(anyhow::Error::new(FrontmatterError(format!($($arg)*))))
};
}
pub fn hyalo_options() -> Options {
Options {
budget: Some(Budget {
max_events: 10_000,
max_depth: 20,
max_aliases: 0,
max_anchors: 0,
max_nodes: 5_000,
max_total_scalar_bytes: 8192,
max_documents: 1,
..Budget::default()
}),
duplicate_keys: DuplicateKeyPolicy::Error,
strict_booleans: true,
..Options::default()
}
}
fn hyalo_serializer_options(compact_list_indent: bool) -> SerializerOptions {
SerializerOptions {
compact_list_indent,
..SerializerOptions::default()
}
}
pub(super) fn detect_list_indent_style(yaml: &str) -> bool {
let mut prev_key_indent: Option<usize> = None;
for line in yaml.lines() {
let trimmed = line.trim_start();
let indent = line.len() - trimmed.len();
if trimmed.is_empty() || trimmed.starts_with('#') {
continue;
}
if trimmed.starts_with("- ") || trimmed == "-" {
if let Some(key_indent) = prev_key_indent {
return indent <= key_indent;
}
return true;
}
if let Some(colon_pos) = trimmed.find(':') {
let before_colon = &trimmed[..colon_pos];
if !before_colon.is_empty()
&& !before_colon.starts_with('-')
&& (trimmed.len() == colon_pos + 1 || trimmed.as_bytes()[colon_pos + 1] == b' ')
{
let after_colon = trimmed[colon_pos + 1..].trim();
if after_colon.is_empty() || after_colon.starts_with('#') {
prev_key_indent = Some(indent);
continue;
}
}
}
prev_key_indent = None;
}
false
}
#[derive(Debug, Clone)]
#[allow(dead_code)] pub(crate) struct Document {
properties: IndexMap<String, Value>,
body: String,
compact_list_indent: bool,
}
#[allow(dead_code)] impl Document {
#[must_use]
pub fn properties(&self) -> &IndexMap<String, Value> {
&self.properties
}
#[must_use]
pub fn body(&self) -> &str {
&self.body
}
pub fn parse(content: &str) -> Result<Self> {
let (yaml_str, body) = extract_frontmatter(content)?;
let (properties, compact_list_indent) = match yaml_str {
Some(yaml) if !yaml.trim().is_empty() => {
let compact = detect_list_indent_style(yaml);
let props: IndexMap<String, Value> =
serde_saphyr::from_str_with_options(yaml, hyalo_options()).map_err(|e| {
anyhow::Error::new(FrontmatterError(format!(
"failed to parse YAML frontmatter: {e}"
)))
})?;
(props, compact)
}
_ => (IndexMap::new(), false),
};
Ok(Self {
properties,
body: body.to_owned(),
compact_list_indent,
})
}
pub fn serialize(&self) -> Result<String> {
let mut out = String::new();
if !self.properties.is_empty() {
out.push_str("---\n");
let yaml = serde_saphyr::to_string_with_options(
&self.properties,
hyalo_serializer_options(self.compact_list_indent),
)
.context("failed to serialize YAML")?;
out.push_str(&yaml);
if !yaml.ends_with('\n') {
out.push('\n');
}
out.push_str("---\n");
}
out.push_str(&self.body);
Ok(out)
}
#[must_use]
pub fn get_property(&self, name: &str) -> Option<&Value> {
self.properties.get(name)
}
pub fn set_property(&mut self, name: String, value: Value) {
self.properties.insert(name, value);
}
pub fn remove_property(&mut self, name: &str) -> Option<Value> {
self.properties.shift_remove(name)
}
}
pub fn body_only(content: &str) -> &str {
match extract_frontmatter(content) {
Ok((_, body)) => body,
Err(_) => content, }
}
pub fn read_frontmatter(path: &Path) -> Result<IndexMap<String, Value>> {
let file = File::open(path).with_context(|| format!("failed to open {}", path.display()))?;
let reader = BufReader::new(file);
read_frontmatter_from_reader(reader)
}
pub fn write_frontmatter(path: &Path, props: &IndexMap<String, Value>) -> Result<()> {
let mut file =
File::open(path).with_context(|| format!("failed to open {}", path.display()))?;
let body_offset = find_body_offset(&mut file)?;
let compact_list_indent = if body_offset > 0 {
file.seek(SeekFrom::Start(0))
.with_context(|| format!("failed to seek in {}", path.display()))?;
#[allow(clippy::cast_possible_truncation)]
let mut fm_bytes = vec![0u8; body_offset as usize];
file.read_exact(&mut fm_bytes)
.with_context(|| format!("failed to read frontmatter of {}", path.display()))?;
let fm_str = String::from_utf8_lossy(&fm_bytes);
let yaml_content = fm_str
.strip_prefix("---\n")
.or_else(|| fm_str.strip_prefix("---\r\n"))
.unwrap_or(&fm_str);
detect_list_indent_style(yaml_content)
} else {
false
};
file.seek(SeekFrom::Start(body_offset))
.with_context(|| format!("failed to seek in {}", path.display()))?;
let mut body_bytes = Vec::new();
file.read_to_end(&mut body_bytes)
.with_context(|| format!("failed to read body of {}", path.display()))?;
drop(file);
let mut out: Vec<u8> = Vec::new();
if !props.is_empty() {
out.extend_from_slice(b"---\n");
let yaml = serde_saphyr::to_string_with_options(
props,
hyalo_serializer_options(compact_list_indent),
)
.context("failed to serialize YAML")?;
out.extend_from_slice(yaml.as_bytes());
if !yaml.ends_with('\n') {
out.push(b'\n');
}
out.extend_from_slice(b"---\n");
}
out.extend_from_slice(&body_bytes);
crate::fs_util::atomic_write(path, &out)
.with_context(|| format!("failed to write {}", path.display()))?;
Ok(())
}
fn find_body_offset(file: &mut File) -> Result<u64> {
const MAX_FRONTMATTER_LINES: usize = 200;
const MAX_FRONTMATTER_BYTES: usize = 8 * 1024;
let mut reader = BufReader::new(&mut *file);
let mut line = String::new();
let n = reader.read_line(&mut line).context("failed to read line")?;
if n == 0 || line.trim_end_matches(['\n', '\r']) != "---" {
return Ok(0);
}
let mut content_bytes: usize = 0;
let mut line_count: usize = 0;
loop {
line.clear();
let n = reader.read_line(&mut line).context("failed to read line")?;
if n == 0 {
parse_bail!(
"unclosed frontmatter: file starts with `---` but no closing `---` was found"
);
}
let trimmed = line.trim_end_matches(['\n', '\r']);
if trimmed.trim() == "---" {
break;
}
line_count += 1;
content_bytes += n;
if line_count > MAX_FRONTMATTER_LINES || content_bytes > MAX_FRONTMATTER_BYTES {
parse_bail!(
"frontmatter too large (no closing `---` found within {MAX_FRONTMATTER_LINES} lines / {MAX_FRONTMATTER_BYTES} bytes)"
);
}
}
let pos = reader
.stream_position()
.context("failed to get stream position")?;
Ok(pos)
}
pub fn skip_frontmatter<R: BufRead>(reader: &mut R, first_line: &str) -> Result<usize> {
const MAX_FRONTMATTER_LINES: usize = 200;
const MAX_FRONTMATTER_BYTES: usize = 8 * 1024;
if first_line.trim() != "---" {
return Ok(0);
}
let mut line_count = 1; let mut total_bytes = 0;
let mut buf = String::new();
loop {
buf.clear();
let n = reader.read_line(&mut buf).context("failed to read line")?;
if n == 0 {
parse_bail!(
"unclosed frontmatter: file starts with `---` but no closing `---` was found"
);
}
line_count += 1;
let trimmed = buf.trim_end_matches(['\n', '\r']);
if trimmed.trim() == "---" {
break;
}
total_bytes += n;
if line_count - 1 > MAX_FRONTMATTER_LINES || total_bytes > MAX_FRONTMATTER_BYTES {
parse_bail!(
"frontmatter too large (no closing `---` found within {MAX_FRONTMATTER_LINES} lines / {MAX_FRONTMATTER_BYTES} bytes)"
);
}
}
Ok(line_count)
}
pub(crate) fn read_frontmatter_from_reader<R: BufRead>(
reader: R,
) -> Result<IndexMap<String, Value>> {
const MAX_FRONTMATTER_LINES: usize = 200;
const MAX_FRONTMATTER_BYTES: usize = 8 * 1024;
let mut lines = reader.lines();
match lines.next() {
Some(Ok(line)) if line.trim() == "---" => {}
_ => return Ok(IndexMap::new()),
}
let mut yaml = String::new();
let mut line_count = 0;
let mut closed = false;
let mut has_content_lines = false;
for line in lines {
has_content_lines = true;
let line = line.context("failed to read line")?;
if line.trim() == "---" {
closed = true;
break;
}
line_count += 1;
if line_count > MAX_FRONTMATTER_LINES || yaml.len() + line.len() > MAX_FRONTMATTER_BYTES {
parse_bail!(
"frontmatter too large (no closing `---` found within {MAX_FRONTMATTER_LINES} lines / {MAX_FRONTMATTER_BYTES} bytes)"
);
}
yaml.push_str(&line);
yaml.push('\n');
}
if !closed {
if !has_content_lines {
return Ok(IndexMap::new());
}
parse_bail!("unclosed frontmatter: file starts with `---` but no closing `---` was found");
}
if yaml.trim().is_empty() {
return Ok(IndexMap::new());
}
serde_saphyr::from_str_with_options(&yaml, hyalo_options()).map_err(|e| {
anyhow::Error::new(FrontmatterError(format!(
"failed to parse YAML frontmatter: {e}"
)))
})
}
#[allow(dead_code)] fn extract_frontmatter(content: &str) -> Result<(Option<&str>, &str)> {
if !content.starts_with("---") {
return Ok((None, content));
}
let after_opening = &content[3..];
let after_opening = if let Some(rest) = after_opening.strip_prefix('\n') {
if rest.is_empty() {
return Ok((None, content));
}
rest
} else if let Some(rest) = after_opening.strip_prefix("\r\n") {
if rest.is_empty() {
return Ok((None, content));
}
rest
} else if after_opening.is_empty() {
return Ok((None, content));
} else {
return Ok((None, content));
};
if let Some(pos) = find_closing_delimiter(after_opening) {
let yaml = &after_opening[..pos];
let rest = &after_opening[pos + 3..];
let body = if let Some(stripped) = rest.strip_prefix('\n') {
stripped
} else if let Some(stripped) = rest.strip_prefix("\r\n") {
stripped
} else {
rest
};
Ok((Some(yaml), body))
} else {
parse_bail!("unclosed frontmatter: file starts with `---` but no closing `---` was found")
}
}
#[allow(dead_code)] fn find_closing_delimiter(s: &str) -> Option<usize> {
if s.starts_with("---")
&& (s.len() == 3
|| s.as_bytes().get(3) == Some(&b'\n')
|| s.as_bytes().get(3) == Some(&b'\r'))
{
return Some(0);
}
let mut search_from = 0;
while let Some(pos) = s[search_from..].find("\n---") {
let abs_pos = search_from + pos + 1; let after = abs_pos + 3;
if after == s.len()
|| s.as_bytes().get(after) == Some(&b'\n')
|| s.as_bytes().get(after) == Some(&b'\r')
{
return Some(abs_pos);
}
search_from = abs_pos + 3;
}
None
}