use std::collections::{BTreeMap, HashSet};
use super::links::{extract_inline_tags, extract_wikilinks};
use super::{
FRONTMATTER_DELIM_START, FrontmatterExtract, FrontmatterValue, MIN_QUOTED_LENGTH,
normalize_keyword,
};
#[must_use]
pub fn parse_frontmatter(content: &str) -> FrontmatterExtract {
let (body, raw) = extract_frontmatter_yaml(content);
let parsed = normalize_obsidian_scalars(&raw);
let mut frontmatter: BTreeMap<String, FrontmatterValue> = BTreeMap::new();
let mut parsed_frontmatter: Option<BTreeMap<String, serde_yaml_ng::Value>> = None;
if let Ok(map) = serde_yaml_ng::from_str::<BTreeMap<String, serde_yaml_ng::Value>>(&parsed) {
for (key, value) in &map {
frontmatter.insert(key.clone(), serde_value_to_fm(value.clone()));
}
parsed_frontmatter = Some(map);
} else if let Ok(map) = serde_yaml_ng::from_str::<BTreeMap<String, serde_yaml_ng::Value>>(&raw)
{
for (key, value) in &map {
frontmatter.insert(key.clone(), serde_value_to_fm(value.clone()));
}
parsed_frontmatter = Some(map);
}
let (body, raw) = if raw.is_empty() || parsed_frontmatter.is_some() {
(body, raw)
} else {
(content.to_string(), String::new())
};
let aliases = extract_aliases(&frontmatter);
let (fm_tags, inline_tags) = extract_tags(&frontmatter, parsed_frontmatter.as_ref(), &body);
let tags = normalize_unique_list(fm_tags, inline_tags);
let links = extract_wikilinks(&body);
FrontmatterExtract {
body,
frontmatter,
frontmatter_raw: raw,
aliases,
tags,
links,
}
}
fn extract_frontmatter_yaml(content: &str) -> (String, String) {
let Some(after_start) = content
.strip_prefix("---\n")
.or_else(|| content.strip_prefix("---\r\n"))
else {
return (content.to_string(), String::new());
};
let Some(end) = find_end_marker(after_start) else {
return (content.to_string(), String::new());
};
let raw = &after_start[..end];
let body = after_start[end + FRONTMATTER_DELIM_START.len()..].to_string();
(body, raw.to_string())
}
fn find_end_marker(s: &str) -> Option<usize> {
let lines: Vec<&str> = s.lines().collect();
for (i, line) in lines.iter().enumerate() {
let trimmed = line.trim();
if trimmed == "---" || trimmed == "..." {
let mut pos = 0;
for (j, l) in lines.iter().enumerate() {
if j < i {
pos += l.len() + 1; } else {
break;
}
}
return Some(pos);
}
}
None
}
fn normalize_obsidian_scalars(raw: &str) -> String {
let mut out = Vec::new();
for line in raw.lines() {
let trimmed = line.trim_start();
if let Some(colon_pos) = trimmed.find(':') {
let key = &trimmed[..colon_pos];
let value = &trimmed[colon_pos + 1..];
if key.contains(' ') {
out.push(line.to_string());
continue;
}
let value = value.strip_prefix(' ').unwrap_or(value);
if needs_scalar_quoting(value) {
let escaped = escape_double_quoted(value);
out.push(format!("{}: \"{}\"", line[..colon_pos].trim_end(), escaped));
} else {
out.push(line.to_string());
}
} else {
out.push(line.to_string());
}
}
out.join("\n")
}
fn needs_scalar_quoting(value: &str) -> bool {
let t = value.trim();
if t.is_empty() {
return false;
}
if (t.starts_with('"') && t.ends_with('"')) || (t.starts_with('\'') && t.ends_with('\'')) {
return false;
}
if t.starts_with('[') || t.starts_with('{') {
return false;
}
if t.starts_with('|') || t.starts_with('>') {
return false;
}
if t.starts_with('&') || t.starts_with('*') {
return false;
}
if matches!(
t.to_lowercase().as_str(),
"true" | "false" | "yes" | "no" | "on" | "off" | "null" | "~"
) {
return false;
}
t.contains(':')
}
fn escape_double_quoted(value: &str) -> String {
value.trim().replace('\\', "\\\\").replace('"', "\\\"")
}
fn serde_value_to_fm(value: serde_yaml_ng::Value) -> FrontmatterValue {
match value {
serde_yaml_ng::Value::String(s) => {
if is_date_value(&s) {
FrontmatterValue::Date(s)
} else {
FrontmatterValue::String(s)
}
}
serde_yaml_ng::Value::Number(n) => FrontmatterValue::Number(n.as_f64().unwrap_or_default()),
serde_yaml_ng::Value::Bool(b) => FrontmatterValue::Boolean(b),
serde_yaml_ng::Value::Null | serde_yaml_ng::Value::Tagged(_) => {
FrontmatterValue::String(String::new())
}
serde_yaml_ng::Value::Sequence(seq) => {
let flattened = flatten_list_value(&seq, true);
FrontmatterValue::List(flattened)
}
serde_yaml_ng::Value::Mapping(map) => {
let inner: Vec<serde_yaml_ng::Value> = map.iter().map(|(_, v)| v.clone()).collect();
let flattened = flatten_list_value(&inner, true);
FrontmatterValue::List(flattened)
}
}
}
fn is_date_value(value: &str) -> bool {
time::OffsetDateTime::parse(value, &time::format_description::well_known::Rfc3339).is_ok()
|| time::Date::parse(
value,
time::macros::format_description!("[year]-[month]-[day]"),
)
.is_ok()
}
fn flatten_list_value(values: &[serde_yaml_ng::Value], in_sequence: bool) -> Vec<String> {
let mut result = Vec::new();
for value in values {
match value {
serde_yaml_ng::Value::String(s) => {
let trimmed = s.trim();
if trimmed.is_empty() {
continue;
}
if (trimmed.starts_with('{')
|| trimmed.starts_with('[')
|| (trimmed.starts_with('"') && trimmed.ends_with('"')))
&& let Ok(parsed) = serde_yaml_ng::from_str::<serde_yaml_ng::Value>(trimmed)
{
result.extend(flatten_list_value(&[parsed], false));
continue;
}
if in_sequence {
result.push(strip_outer_quotes(trimmed));
} else {
for item in split_list_text(trimmed) {
result.push(item);
}
}
}
serde_yaml_ng::Value::Number(n) => {
result.push(n.to_string());
}
serde_yaml_ng::Value::Bool(b) => {
result.push(b.to_string());
}
serde_yaml_ng::Value::Sequence(seq) => {
result.extend(flatten_list_value(seq, true));
}
serde_yaml_ng::Value::Mapping(map) => {
let inner: Vec<serde_yaml_ng::Value> = map.iter().map(|(_, v)| v.clone()).collect();
result.extend(flatten_list_value(&inner, true));
}
serde_yaml_ng::Value::Tagged(_) | serde_yaml_ng::Value::Null => {
}
}
}
result
}
fn split_list_text(value: &str) -> Vec<String> {
value
.split([',', '\n'])
.map(|item| strip_outer_quotes(item.trim()))
.filter(|item| !item.is_empty())
.collect()
}
fn strip_outer_quotes(value: &str) -> String {
let trimmed = value.trim();
if trimmed.len() < MIN_QUOTED_LENGTH {
return trimmed.to_string();
}
let first = trimmed.chars().next().unwrap_or('\0');
let last = trimmed.chars().last().unwrap_or('\0');
if (first == '"' || first == '\'') && first == last {
return trimmed[1..trimmed.len() - 1].to_string();
}
trimmed.to_string()
}
fn extract_aliases(frontmatter: &BTreeMap<String, FrontmatterValue>) -> Vec<String> {
let mut values = Vec::new();
for key in &["aliases", "alias"] {
if let Some(FrontmatterValue::List(list)) = frontmatter.get(*key) {
values.extend(list.iter().cloned());
}
if let Some(FrontmatterValue::String(s)) = frontmatter.get(*key) {
values.extend(split_list_text(s));
}
}
normalize_unique_list(values, Vec::new())
}
fn extract_tags(
frontmatter: &BTreeMap<String, FrontmatterValue>,
parsed_frontmatter: Option<&BTreeMap<String, serde_yaml_ng::Value>>,
body: &str,
) -> (Vec<String>, Vec<String>) {
let mut fm_tags = Vec::new();
if let Some(parsed) = parsed_frontmatter {
for key in &["tags", "tag"] {
if let Some(value) = parsed.get(*key) {
fm_tags.extend(extract_string_tags_from_yaml(value));
}
}
} else {
for key in &["tags", "tag"] {
if let Some(FrontmatterValue::List(list)) = frontmatter.get(*key) {
fm_tags.extend(list.iter().cloned());
}
if let Some(FrontmatterValue::String(s)) = frontmatter.get(*key) {
fm_tags.extend(split_list_text(s));
}
}
}
let inline_tags = extract_inline_tags(body);
(fm_tags, inline_tags)
}
fn extract_string_tags_from_yaml(value: &serde_yaml_ng::Value) -> Vec<String> {
match value {
serde_yaml_ng::Value::String(s) => split_list_text(s),
serde_yaml_ng::Value::Sequence(seq) => seq
.iter()
.filter_map(|item| match item {
serde_yaml_ng::Value::String(s) => Some(split_list_text(s)),
_ => None,
})
.flatten()
.collect(),
_ => Vec::new(),
}
}
fn normalize_unique_list(fm_tags: Vec<String>, inline_tags: Vec<String>) -> Vec<String> {
let mut all = fm_tags;
all.extend(inline_tags);
let mut seen = HashSet::new();
let mut result = Vec::new();
for value in all {
let normalized = normalize_keyword(&value);
if !normalized.is_empty() && !seen.contains(&normalized) {
seen.insert(normalized);
result.push(value.trim().to_string());
}
}
result
}