use core::fmt::Write as _;
use crate::cst::builder::{
document_boundaries, parse_full, parse_subtree, rebuild_with_splice, SubtreeContext,
};
use crate::cst::green::{GreenChild, GreenNode};
use crate::cst::syntax::SyntaxKind;
use crate::error::{Error, Result};
use crate::path::{parse_query_path, QuerySegment};
use crate::prelude::*;
use crate::span_context::SpanTree;
use crate::value::{Number, Value};
#[derive(Debug)]
pub struct Document {
source: Arc<str>,
green: GreenNode,
cache: core::cell::RefCell<Option<(Value, SpanTree)>>,
last_repair_scope: core::cell::Cell<Option<RepairScope>>,
}
impl Clone for Document {
fn clone(&self) -> Self {
Self {
source: Arc::clone(&self.source),
green: self.green.clone(),
cache: core::cell::RefCell::new(self.cache.borrow().clone()),
last_repair_scope: core::cell::Cell::new(self.last_repair_scope.get()),
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum RepairScope {
Scalar,
Entry,
Collection,
Document,
}
impl Document {
#[must_use]
pub fn syntax(&self) -> &GreenNode {
&self.green
}
#[must_use]
pub fn as_value(&self) -> core::cell::Ref<'_, Value> {
self.ensure_cache();
core::cell::Ref::map(self.cache.borrow(), |opt| {
&opt.as_ref().expect("ensure_cache populated").0
})
}
#[must_use]
pub fn source(&self) -> &str {
&self.source
}
#[must_use]
pub fn span_at(&self, path: &str) -> Option<(usize, usize)> {
let segments = parse_query_path(path);
if let Some((s, e)) = resolve_path_in_green(&self.green, &segments, &self.source) {
return Some(trim_trailing_blank(&self.source, s, e));
}
self.ensure_cache();
let cache = self.cache.borrow();
let (value, span_tree) = cache.as_ref().expect("ensure_cache populated");
let (s, e) = resolve_span(value, span_tree, &segments)?;
Some(trim_trailing_blank(&self.source, s, e))
}
fn ensure_cache(&self) {
if self.cache.borrow().is_some() {
return;
}
let cfg = crate::parser::ParseConfig::default();
let parsed = crate::parser::parse_one(&self.source, &cfg)
.expect("Document source must always parse — local repair invariant violated");
*self.cache.borrow_mut() = Some(parsed);
}
pub fn validate(&self) -> Result<()> {
if self.cache.borrow().is_some() {
return Ok(());
}
let cfg = crate::parser::ParseConfig::default();
let parsed = crate::parser::parse_one(&self.source, &cfg)?;
*self.cache.borrow_mut() = Some(parsed);
Ok(())
}
#[must_use]
pub fn get(&self, path: &str) -> Option<&str> {
let (s, e) = self.span_at(path)?;
Some(&self.source[s..e])
}
pub fn replace_span(&mut self, start: usize, end: usize, replacement: &str) -> Result<()> {
if start > end || end > self.source.len() {
return Err(Error::Parse(format!(
"replace_span range {start}..{end} out of bounds (source length {})",
self.source.len()
)));
}
if !self.source.is_char_boundary(start) || !self.source.is_char_boundary(end) {
return Err(Error::Parse(format!(
"replace_span range {start}..{end} is not a character boundary"
)));
}
let mut new_source =
String::with_capacity(self.source.len() - (end - start) + replacement.len());
new_source.push_str(&self.source[..start]);
new_source.push_str(replacement);
new_source.push_str(&self.source[end..]);
let new_arc: Arc<str> = Arc::from(new_source.as_str());
if let Some((new_green, scope)) =
self.try_local_repair_green(start, end, replacement, &new_source)
{
self.last_repair_scope.set(Some(scope));
self.source = new_arc;
self.green = new_green;
let _ = self.cache.replace(None);
return Ok(());
}
let parsed = parse_full(&new_source)?;
self.last_repair_scope.set(Some(RepairScope::Document));
self.source = parsed.source;
self.green = parsed.green;
let _ = self.cache.replace(Some((parsed.value, parsed.span_tree)));
Ok(())
}
fn try_local_repair_green(
&self,
start: usize,
end: usize,
replacement: &str,
new_source: &str,
) -> Option<(GreenNode, RepairScope)> {
if region_has_anchor_alias_or_tag(&self.green, start, end)
|| replacement_introduces_anchor_alias_or_tag(replacement)
{
return None;
}
let delta = replacement.len() as isize - (end as isize - start as isize);
let candidates = ancestor_candidates(&self.green, start, end);
for cand in &candidates {
if !is_phase_a_repairable(cand.kind) {
continue;
}
let n_old_start = cand.start;
let n_old_end = cand.end;
let n_new_start = n_old_start; let n_new_end_signed = n_old_end as isize + delta;
if n_new_end_signed < n_new_start as isize {
continue;
}
let n_new_end = n_new_end_signed as usize;
if n_new_end > new_source.len() {
continue;
}
let fragment = &new_source[n_new_start..n_new_end];
let indent = entry_indent_column(&self.source, n_old_start);
let ctx = SubtreeContext::block_at(indent);
match parse_subtree(fragment, ctx, cand.kind) {
Ok(new_sub)
if new_sub.kind() == cand.kind && new_sub.text_len() == fragment.len() =>
{
let new_root =
rebuild_with_splice(&self.green, n_old_start, n_old_end, new_sub);
return Some((new_root, scope_for_kind(cand.kind)));
}
Ok(_) | Err(_) => {
continue;
}
}
}
None
}
#[must_use]
pub fn last_repair_scope(&self) -> Option<RepairScope> {
self.last_repair_scope.get()
}
pub fn set(&mut self, path: &str, fragment: &str) -> Result<()> {
let (s, e) = self
.span_at(path)
.ok_or_else(|| Error::Parse(format!("path not found: {path}")))?;
self.replace_span(s, e, fragment)
}
pub fn set_value(&mut self, path: &str, value: &Value) -> Result<()> {
let (s, e) = self
.span_at(path)
.ok_or_else(|| Error::Parse(format!("path not found: {path}")))?;
let kind = leaf_kind_at(&self.green, s).ok_or_else(|| {
Error::Parse("could not locate green-tree leaf at target span".into())
})?;
let neighbour = sibling_dominant_scalar_kind(&self.green, s)
.filter(|_| kind == SyntaxKind::PlainScalar);
let entry_col = entry_indent_column(&self.source, s);
let ctx = SiteContext {
kind,
neighbour,
entry_col,
};
let fragment = format_value_for_site(value, &ctx)?;
self.replace_span(s, e, &fragment)
}
pub fn remove(&mut self, path: &str) -> Result<()> {
self.ensure_cache();
let segments = parse_query_path(path);
let (line_start, line_end) = {
let cache = self.cache.borrow();
let (value, span_tree) = cache.as_ref().expect("ensure_cache populated");
entry_line_span(value, span_tree, &self.source, &segments)?
};
self.replace_span(line_start, line_end, "")
}
pub fn push_back(&mut self, path: &str, fragment: &str) -> Result<()> {
self.ensure_cache();
let seq_len = {
let cache = self.cache.borrow();
let (value, _) = cache.as_ref().expect("ensure_cache populated");
let target = path_value(value, path)
.ok_or_else(|| Error::Parse(format!("path not found: {path}")))?;
match target {
Value::Sequence(s) => s.len(),
_ => {
return Err(Error::Parse(
"push_back: target path is not a sequence".into(),
));
}
}
};
if seq_len == 0 {
return Err(Error::Parse(
"push_back: empty sequence has no anchor for indentation — use `set` with a fragment instead"
.into(),
));
}
let item_path = format!("{path}[{}]", seq_len - 1);
let (last_start, last_end) = self
.span_at(&item_path)
.ok_or_else(|| Error::Parse("push_back: could not resolve last item span".into()))?;
let dash_col = column_of_preceding_dash(&self.source, last_start).ok_or_else(|| {
Error::Parse(
"push_back: only block sequences are supported (no `-` anchor before last item)"
.into(),
)
})?;
let line_end = end_of_line(&self.source, last_end);
let indent: String = " ".repeat(dash_col);
let new_line = format!("{indent}- {fragment}\n");
self.replace_span(line_end, line_end, &new_line)
}
#[must_use]
pub fn indent_unit(&self) -> usize {
detect_indent_unit(&self.source)
}
#[must_use]
pub fn dominant_quote_style(&self) -> crate::ScalarStyle {
detect_dominant_quote_style(&self.green)
}
#[must_use]
pub fn dominant_flow_style(&self) -> crate::FlowStyle {
detect_dominant_flow_style(&self.green)
}
pub fn insert_entry(&mut self, mapping_path: &str, key: &str, fragment: &str) -> Result<()> {
let child_path = if mapping_path.is_empty() {
key.to_owned()
} else {
format!("{mapping_path}.{key}")
};
if self.span_at(&child_path).is_some() {
return self.set(&child_path, fragment);
}
self.ensure_cache();
let last_key: String = {
let cache = self.cache.borrow();
let (value, _) = cache.as_ref().expect("ensure_cache populated");
let target = if mapping_path.is_empty() {
value
} else {
path_value(value, mapping_path)
.ok_or_else(|| Error::Parse(format!("path not found: {mapping_path}")))?
};
let mapping = match target {
Value::Mapping(m) => m,
_ => {
return Err(Error::Parse(
"insert_entry: target path is not a mapping".into(),
));
}
};
if mapping.is_empty() {
return Err(Error::Parse(
"insert_entry: empty mapping has no anchor for indentation — \
use `set` with a fragment instead"
.into(),
));
}
mapping
.iter()
.last()
.map(|(k, _)| k.clone())
.expect("non-empty mapping has a last entry")
};
let last_path = if mapping_path.is_empty() {
last_key
} else {
format!("{mapping_path}.{last_key}")
};
let (last_value_start, last_value_end) = self.span_at(&last_path).ok_or_else(|| {
Error::Parse("insert_entry: could not resolve last entry span".into())
})?;
let key_col = column_of_key_at(&self.source, last_value_start).ok_or_else(|| {
Error::Parse("insert_entry: could not locate last key's column for indentation".into())
})?;
let line_end = end_of_line(&self.source, last_value_end);
let indent: String = " ".repeat(key_col);
let new_line = if fragment.contains('\n') {
let unit = detect_indent_unit(&self.source);
let inner_indent: String = " ".repeat(key_col + unit);
let body = fragment.trim_start_matches('\n');
let mut buf = format!("{indent}{key}:\n");
for line in body.split('\n') {
if line.is_empty() {
buf.push('\n');
} else {
buf.push_str(&inner_indent);
buf.push_str(line);
buf.push('\n');
}
}
buf
} else {
format!("{indent}{key}: {fragment}\n")
};
self.replace_span(line_end, line_end, &new_line)
}
pub fn insert_after(&mut self, item_path: &str, fragment: &str) -> Result<()> {
let segments = parse_query_path(item_path);
if !matches!(segments.last(), Some(QuerySegment::Index(_))) {
return Err(Error::Parse(
"insert_after: path must end with a sequence index, e.g. `items[2]`".into(),
));
}
let (item_start, item_end) = self
.span_at(item_path)
.ok_or_else(|| Error::Parse(format!("path not found: {item_path}")))?;
let dash_col = column_of_preceding_dash(&self.source, item_start).ok_or_else(|| {
Error::Parse(
"insert_after: only block sequences are supported (no `-` anchor before item)"
.into(),
)
})?;
let line_end = end_of_line(&self.source, item_end);
let indent: String = " ".repeat(dash_col);
let new_line = format!("{indent}- {fragment}\n");
self.replace_span(line_end, line_end, &new_line)
}
}
impl fmt::Display for Document {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.write_str(&self.green.text(&self.source))
}
}
pub fn parse_document(input: &str) -> Result<Document> {
let parsed = parse_full(input)?;
Ok(Document {
source: parsed.source,
green: parsed.green,
cache: core::cell::RefCell::new(Some((parsed.value, parsed.span_tree))),
last_repair_scope: core::cell::Cell::new(None),
})
}
pub fn parse_stream(input: &str) -> Result<Vec<Document>> {
let bounds = document_boundaries(input)?;
if bounds.len() <= 1 {
return Ok(vec![parse_document(input)?]);
}
let mut out = Vec::with_capacity(bounds.len());
for (s, e) in bounds {
if s == e {
continue;
}
out.push(parse_document(&input[s..e])?);
}
Ok(out)
}
fn scope_for_kind(kind: SyntaxKind) -> RepairScope {
match kind {
SyntaxKind::MappingEntry | SyntaxKind::SequenceItem => RepairScope::Entry,
SyntaxKind::BlockMapping
| SyntaxKind::BlockSequence
| SyntaxKind::FlowMapping
| SyntaxKind::FlowSequence => RepairScope::Collection,
_ => RepairScope::Document,
}
}
fn is_phase_a_repairable(kind: SyntaxKind) -> bool {
matches!(
kind,
SyntaxKind::BlockMapping
| SyntaxKind::BlockSequence
| SyntaxKind::MappingEntry
| SyntaxKind::SequenceItem
)
}
struct Candidate {
kind: SyntaxKind,
start: usize,
end: usize,
}
fn ancestor_candidates(root: &GreenNode, start: usize, end: usize) -> Vec<Candidate> {
let mut out = Vec::new();
collect_ancestors(root, start, end, 0, &mut out);
out.reverse();
out
}
fn collect_ancestors(
node: &GreenNode,
start: usize,
end: usize,
base: usize,
out: &mut Vec<Candidate>,
) {
let node_end = base + node.text_len();
if start >= base && end <= node_end {
out.push(Candidate {
kind: node.kind(),
start: base,
end: node_end,
});
let mut pos = base;
for child in node.children() {
let len = child.text_len();
let child_end = pos + len;
if start >= pos && end <= child_end {
if let GreenChild::Node(inner) = child {
collect_ancestors(inner, start, end, pos, out);
}
break;
}
pos += len;
}
}
}
fn region_has_anchor_alias_or_tag(root: &GreenNode, start: usize, end: usize) -> bool {
let mut found = false;
walk_tokens(root, 0, &mut |kind, range| {
if range.start >= end || range.end <= start {
return; }
if matches!(
kind,
SyntaxKind::AnchorMark | SyntaxKind::AliasMark | SyntaxKind::TagMark
) {
found = true;
}
});
found
}
fn walk_tokens(
node: &GreenNode,
base: usize,
visit: &mut dyn FnMut(SyntaxKind, core::ops::Range<usize>),
) {
let mut pos = base;
for child in node.children() {
let len = child.text_len();
match child {
GreenChild::Token { kind, .. } => {
visit(*kind, pos..pos + len);
}
GreenChild::Node(inner) => walk_tokens(inner, pos, visit),
}
pos += len;
}
}
fn replacement_introduces_anchor_alias_or_tag(replacement: &str) -> bool {
replacement.bytes().any(|b| matches!(b, b'&' | b'*' | b'!'))
}
fn resolve_path_in_green(
root: &GreenNode,
segments: &[QuerySegment],
source: &str,
) -> Option<(usize, usize)> {
let (collection, base) = first_collection_child(root, 0)?;
walk_path(collection, segments, base, source)
}
fn first_collection_child(node: &GreenNode, base: usize) -> Option<(&GreenNode, usize)> {
let mut pos = base;
for child in node.children() {
let len = child.text_len();
if let GreenChild::Node(inner) = child {
if matches!(
inner.kind(),
SyntaxKind::BlockMapping
| SyntaxKind::BlockSequence
| SyntaxKind::FlowMapping
| SyntaxKind::FlowSequence
) {
return Some((inner, pos));
}
}
pos += len;
}
None
}
fn walk_path(
node: &GreenNode,
segments: &[QuerySegment],
base: usize,
source: &str,
) -> Option<(usize, usize)> {
if segments.is_empty() {
return Some((base, base + node.text_len()));
}
let (head, tail) = segments.split_first()?;
match (head, node.kind()) {
(QuerySegment::Key(k), SyntaxKind::BlockMapping)
| (QuerySegment::Key(k), SyntaxKind::FlowMapping) => {
walk_mapping(node, k, tail, base, source)
}
(QuerySegment::Index(i), SyntaxKind::BlockSequence)
| (QuerySegment::Index(i), SyntaxKind::FlowSequence) => {
walk_sequence(node, *i, tail, base, source)
}
_ => None,
}
}
fn walk_mapping(
node: &GreenNode,
key: &str,
tail: &[QuerySegment],
base: usize,
source: &str,
) -> Option<(usize, usize)> {
let mut pos = base;
for child in node.children() {
let len = child.text_len();
if let GreenChild::Node(entry) = child {
if entry.kind() == SyntaxKind::MappingEntry {
if let Some(entry_key) = entry_key_text(entry, source, pos) {
if entry_key == key {
return resolve_value_in_entry(entry, pos, tail, source);
}
}
}
}
pos += len;
}
None
}
fn walk_sequence(
node: &GreenNode,
target_index: usize,
tail: &[QuerySegment],
base: usize,
source: &str,
) -> Option<(usize, usize)> {
let mut pos = base;
let mut idx = 0usize;
for child in node.children() {
let len = child.text_len();
if let GreenChild::Node(item) = child {
if item.kind() == SyntaxKind::SequenceItem {
if idx == target_index {
return resolve_value_in_item(item, pos, tail, source);
}
idx += 1;
}
}
pos += len;
}
None
}
fn entry_key_text<'s>(entry: &GreenNode, source: &'s str, base: usize) -> Option<Cow<'s, str>> {
let mut pos = base;
for child in entry.children() {
let child_len = child.text_len();
match child {
GreenChild::Token { kind, len } => {
let start = pos;
let end = pos + *len as usize;
match kind {
SyntaxKind::QuestionIndicator
| SyntaxKind::Whitespace
| SyntaxKind::Newline
| SyntaxKind::Comment
| SyntaxKind::AnchorMark
| SyntaxKind::TagMark => {}
SyntaxKind::PlainScalar => {
return Some(Cow::Borrowed(&source[start..end]));
}
SyntaxKind::SingleQuotedScalar => {
return decode_single_quoted(&source[start..end]);
}
_ => return None,
}
}
GreenChild::Node(_) => {
return None;
}
}
pos += child_len;
}
None
}
fn decode_single_quoted(raw: &str) -> Option<Cow<'_, str>> {
let inner = raw.strip_prefix('\'')?.strip_suffix('\'')?;
if !inner.contains('\'') {
return Some(Cow::Borrowed(inner));
}
Some(Cow::Owned(inner.replace("''", "'")))
}
fn resolve_value_in_entry(
entry: &GreenNode,
base: usize,
tail: &[QuerySegment],
source: &str,
) -> Option<(usize, usize)> {
let (value_kind, value_range, value_node) = entry_value(entry, base)?;
if tail.is_empty() {
return Some(value_range);
}
let node = value_node?;
walk_path(node, tail, value_range.0, source).map(|(s, e)| {
let _ = value_kind;
(s, e)
})
}
fn resolve_value_in_item(
item: &GreenNode,
base: usize,
tail: &[QuerySegment],
source: &str,
) -> Option<(usize, usize)> {
let (_, value_range, value_node) = item_value(item, base)?;
if tail.is_empty() {
return Some(value_range);
}
let node = value_node?;
walk_path(node, tail, value_range.0, source)
}
fn entry_value(
entry: &GreenNode,
base: usize,
) -> Option<(SyntaxKind, (usize, usize), Option<&GreenNode>)> {
let mut pos = base;
let mut after_colon = false;
let mut prefix_start: Option<usize> = None;
for child in entry.children() {
let len = child.text_len();
let child_start = pos;
let child_end = pos + len;
match child {
GreenChild::Token { kind, .. } => {
if !after_colon {
if *kind == SyntaxKind::ColonIndicator {
after_colon = true;
}
} else if is_value_property_kind(*kind) {
let _ = prefix_start.get_or_insert(child_start);
} else if !is_trivia_kind(*kind) {
let start = prefix_start.unwrap_or(child_start);
return Some((*kind, (start, child_end), None));
}
}
GreenChild::Node(inner) => {
if after_colon {
let start = prefix_start.unwrap_or(child_start);
return Some((inner.kind(), (start, child_end), Some(inner)));
}
}
}
pos += len;
}
prefix_start.map(|start| (SyntaxKind::PlainScalar, (start, pos), None))
}
fn item_value(
item: &GreenNode,
base: usize,
) -> Option<(SyntaxKind, (usize, usize), Option<&GreenNode>)> {
let mut pos = base;
let mut after_dash = false;
let mut prefix_start: Option<usize> = None;
for child in item.children() {
let len = child.text_len();
let child_start = pos;
let child_end = pos + len;
match child {
GreenChild::Token { kind, .. } => {
if !after_dash {
if *kind == SyntaxKind::DashIndicator {
after_dash = true;
}
} else if is_value_property_kind(*kind) {
let _ = prefix_start.get_or_insert(child_start);
} else if !is_trivia_kind(*kind) {
let start = prefix_start.unwrap_or(child_start);
return Some((*kind, (start, child_end), None));
}
}
GreenChild::Node(inner) => {
if after_dash {
let start = prefix_start.unwrap_or(child_start);
return Some((inner.kind(), (start, child_end), Some(inner)));
}
}
}
pos += len;
}
prefix_start.map(|start| (SyntaxKind::PlainScalar, (start, pos), None))
}
fn is_trivia_kind(k: SyntaxKind) -> bool {
matches!(
k,
SyntaxKind::Whitespace
| SyntaxKind::Newline
| SyntaxKind::Comment
| SyntaxKind::Bom
| SyntaxKind::Directive
)
}
fn is_value_property_kind(k: SyntaxKind) -> bool {
matches!(
k,
SyntaxKind::AnchorMark | SyntaxKind::TagMark | SyntaxKind::AliasMark
)
}
fn trim_trailing_blank(source: &str, start: usize, mut end: usize) -> (usize, usize) {
let bytes = source.as_bytes();
while end > start {
match bytes[end - 1] {
b' ' | b'\t' | b'\n' | b'\r' => end -= 1,
_ => break,
}
}
(start, end)
}
fn resolve_span(
value: &Value,
span_tree: &SpanTree,
segments: &[QuerySegment],
) -> Option<(usize, usize)> {
if segments.is_empty() {
return Some(match span_tree {
SpanTree::Leaf(s, e) => (*s, *e),
SpanTree::Sequence { start, end, .. } | SpanTree::Mapping { start, end, .. } => {
(*start, *end)
}
});
}
let (head, tail) = segments.split_first()?;
match (head, value, span_tree) {
(QuerySegment::Key(k), Value::Mapping(m), SpanTree::Mapping { entries, .. }) => {
for ((mk, mv), (_, child_tree)) in m.iter().zip(entries.iter()) {
if mk == k {
return resolve_span(mv, child_tree, tail);
}
}
None
}
(QuerySegment::Index(i), Value::Sequence(seq), SpanTree::Sequence { items, .. }) => {
let v = seq.get(*i)?;
let t = items.get(*i)?;
resolve_span(v, t, tail)
}
_ => None,
}
}
fn entry_line_span(
value: &Value,
span_tree: &SpanTree,
source: &str,
segments: &[QuerySegment],
) -> Result<(usize, usize)> {
if segments.is_empty() {
return Err(Error::Parse(
"remove requires a non-empty path (cannot remove the document root)".into(),
));
}
let (head, tail) = segments
.split_first()
.ok_or_else(|| Error::Parse("path not found".into()))?;
if !tail.is_empty() {
let (child_value, child_tree) = match (head, value, span_tree) {
(QuerySegment::Key(k), Value::Mapping(m), SpanTree::Mapping { entries, .. }) => {
let pos = m
.iter()
.position(|(mk, _)| mk == k)
.ok_or_else(|| Error::Parse(format!("path not found: missing key {k:?}")))?;
(
m.iter().nth(pos).map(|(_, v)| v).expect("pos in range"),
&entries[pos].1,
)
}
(QuerySegment::Index(i), Value::Sequence(seq), SpanTree::Sequence { items, .. }) => (
seq.get(*i).ok_or_else(|| {
Error::Parse(format!("path not found: index {i} out of bounds"))
})?,
items.get(*i).ok_or_else(|| {
Error::Parse(format!("path not found: index {i} out of bounds"))
})?,
),
_ => return Err(Error::Parse("path not found".into())),
};
return entry_line_span(child_value, child_tree, source, tail);
}
match (head, value, span_tree) {
(QuerySegment::Key(k), Value::Mapping(m), SpanTree::Mapping { entries, .. }) => {
if m.len() <= 1 {
return Err(Error::Parse(
"remove cannot delete the only entry of a mapping".into(),
));
}
let pos = m
.iter()
.position(|(mk, _)| mk == k)
.ok_or_else(|| Error::Parse(format!("path not found: missing key {k:?}")))?;
let ((key_start, _key_end), child_tree) = &entries[pos];
let raw_value_end = match child_tree {
SpanTree::Leaf(_, e) => *e,
SpanTree::Sequence { end, .. } | SpanTree::Mapping { end, .. } => *end,
};
let (_, value_end) = trim_trailing_blank(source, *key_start, raw_value_end);
require_single_line(source, *key_start, value_end)?;
Ok(line_extent(source, *key_start, value_end))
}
(QuerySegment::Index(i), Value::Sequence(seq), SpanTree::Sequence { items, .. }) => {
if seq.len() <= 1 {
return Err(Error::Parse(
"remove cannot delete the only entry of a sequence".into(),
));
}
let item_tree = items
.get(*i)
.ok_or_else(|| Error::Parse(format!("path not found: index {i} out of bounds")))?;
let (value_start, raw_value_end) = match item_tree {
SpanTree::Leaf(s, e) => (*s, *e),
SpanTree::Sequence { start, end, .. } | SpanTree::Mapping { start, end, .. } => {
(*start, *end)
}
};
let (_, value_end) = trim_trailing_blank(source, value_start, raw_value_end);
let dash_pos = locate_preceding_dash(source, value_start).ok_or_else(|| {
Error::Parse(
"remove: could not locate '-' indicator preceding sequence item".into(),
)
})?;
require_single_line(source, dash_pos, value_end)?;
Ok(line_extent(source, dash_pos, value_end))
}
_ => Err(Error::Parse("path not found".into())),
}
}
fn path_value<'a>(value: &'a Value, path: &str) -> Option<&'a Value> {
let segments = parse_query_path(path);
let mut cur = value;
for seg in &segments {
match (seg, cur) {
(QuerySegment::Key(k), Value::Mapping(m)) => {
let (_k, v) = m.iter().find(|(mk, _)| *mk == k)?;
cur = v;
}
(QuerySegment::Index(i), Value::Sequence(seq)) => {
cur = seq.get(*i)?;
}
_ => return None,
}
}
Some(cur)
}
fn column_of_preceding_dash(source: &str, value_start: usize) -> Option<usize> {
let dash_pos = locate_preceding_dash(source, value_start)?;
let bytes = source.as_bytes();
let mut line_start = dash_pos;
while line_start > 0 && bytes[line_start - 1] != b'\n' {
line_start -= 1;
}
Some(dash_pos - line_start)
}
fn detect_indent_unit(source: &str) -> usize {
let mut prev_indent: Option<usize> = None;
let mut min_step: Option<usize> = None;
for line in source.lines() {
let mut spaces = 0;
let bytes = line.as_bytes();
let mut tab_seen = false;
for &b in bytes {
if b == b' ' {
spaces += 1;
} else if b == b'\t' {
tab_seen = true;
break;
} else {
break;
}
}
if tab_seen {
continue;
}
let trimmed = &line[spaces..];
if trimmed.is_empty() || trimmed.starts_with('#') {
continue;
}
if let Some(prev) = prev_indent {
if spaces > prev {
let step = spaces - prev;
min_step = Some(min_step.map_or(step, |m| m.min(step)));
}
}
prev_indent = Some(spaces);
}
min_step.unwrap_or(2)
}
fn column_of_key_at(source: &str, value_start: usize) -> Option<usize> {
let bytes = source.as_bytes();
if value_start > bytes.len() {
return None;
}
let line_start = |pos: usize| -> usize {
let mut s = pos;
while s > 0 && bytes[s - 1] != b'\n' {
s -= 1;
}
s
};
let leading_spaces = |start: usize| -> usize {
let mut c = 0;
while start + c < bytes.len() && bytes[start + c] == b' ' {
c += 1;
}
c
};
let value_line_start = line_start(value_start);
let value_col = leading_spaces(value_line_start);
let mut probe = value_line_start + value_col;
let mut inline_content = false;
while probe < value_start {
let b = bytes[probe];
if b != b' ' && b != b'\t' {
inline_content = true;
break;
}
probe += 1;
}
if inline_content {
return Some(value_col);
}
if value_line_start == 0 {
return Some(value_col);
}
let mut cursor = value_line_start - 1; loop {
let mut prev_start = cursor;
while prev_start > 0 && bytes[prev_start - 1] != b'\n' {
prev_start -= 1;
}
let prev_col = leading_spaces(prev_start);
let first_content = prev_start + prev_col;
let after_content = cursor; let is_blank = first_content >= after_content;
let is_comment = !is_blank && bytes[first_content] == b'#';
if !is_blank && !is_comment && prev_col < value_col {
return Some(prev_col);
}
if prev_start == 0 {
return Some(value_col);
}
cursor = prev_start - 1;
}
}
fn detect_dominant_quote_style(root: &GreenNode) -> crate::ScalarStyle {
let mut single = 0_usize;
let mut double = 0_usize;
walk_tokens(root, 0, &mut |kind, _| match kind {
SyntaxKind::SingleQuotedScalar => single += 1,
SyntaxKind::DoubleQuotedScalar => double += 1,
_ => {}
});
if single == 0 && double == 0 {
return crate::ScalarStyle::Plain;
}
if single >= double {
crate::ScalarStyle::SingleQuoted
} else {
crate::ScalarStyle::DoubleQuoted
}
}
fn detect_dominant_flow_style(root: &GreenNode) -> crate::FlowStyle {
let mut block = 0_usize;
let mut flow = 0_usize;
walk_collections(root, &mut |kind| match kind {
SyntaxKind::BlockMapping | SyntaxKind::BlockSequence => block += 1,
SyntaxKind::FlowMapping | SyntaxKind::FlowSequence => flow += 1,
_ => {}
});
if flow > block {
crate::FlowStyle::Auto
} else {
crate::FlowStyle::Block
}
}
fn walk_collections(node: &GreenNode, visit: &mut dyn FnMut(SyntaxKind)) {
visit(node.kind());
for child in node.children() {
if let GreenChild::Node(inner) = child {
walk_collections(inner, visit);
}
}
}
fn end_of_line(source: &str, pos: usize) -> usize {
let bytes = source.as_bytes();
let mut i = pos;
while i < bytes.len() && bytes[i] != b'\n' {
i += 1;
}
if i < bytes.len() {
i + 1
} else {
i
}
}
fn locate_preceding_dash(source: &str, value_start: usize) -> Option<usize> {
let bytes = source.as_bytes();
let mut i = value_start;
while i > 0 {
i -= 1;
match bytes[i] {
b' ' | b'\t' => {}
b'-' => return Some(i),
b'\n' | b'\r' => return None,
_ => return None,
}
}
None
}
fn require_single_line(source: &str, start: usize, end: usize) -> Result<()> {
let segment = &source.as_bytes()[start..end];
if segment.contains(&b'\n') {
return Err(Error::Parse(
"remove: multi-line / nested-value entries are not yet supported".into(),
));
}
Ok(())
}
fn line_extent(source: &str, start: usize, end: usize) -> (usize, usize) {
let bytes = source.as_bytes();
let mut s = start;
while s > 0 && bytes[s - 1] != b'\n' {
s -= 1;
}
let mut e = end;
while e < bytes.len() && bytes[e] != b'\n' {
e += 1;
}
if e < bytes.len() {
e += 1;
}
(s, e)
}
fn leaf_kind_at(node: &GreenNode, target: usize) -> Option<SyntaxKind> {
let mut pos = 0;
for child in node.children() {
let len = child.text_len();
match child {
GreenChild::Token { kind, .. } => {
if pos <= target && target < pos + len {
return Some(*kind);
}
}
GreenChild::Node(inner) => {
if pos <= target && target < pos + len {
return leaf_kind_at(inner, target - pos);
}
}
}
pos += len;
}
None
}
fn sibling_dominant_scalar_kind(node: &GreenNode, target: usize) -> Option<SyntaxKind> {
let (mapping, entry) = enclosing_mapping_and_entry(node, target, 0)?;
dominant_sibling_value_kind(mapping, entry)
}
fn enclosing_mapping_and_entry(
node: &GreenNode,
target: usize,
base: usize,
) -> Option<(&GreenNode, &GreenNode)> {
fn walk<'a>(
node: &'a GreenNode,
target: usize,
base: usize,
cur_mapping: Option<&'a GreenNode>,
cur_entry: Option<&'a GreenNode>,
) -> Option<(&'a GreenNode, &'a GreenNode)> {
let mut pos = base;
for child in node.children() {
let len = child.text_len();
if pos <= target && target < pos + len {
match child {
GreenChild::Token { .. } => {
if let (Some(m), Some(e)) = (cur_mapping, cur_entry) {
return Some((m, e));
}
return None;
}
GreenChild::Node(inner) => {
let new_mapping = if inner.kind() == SyntaxKind::BlockMapping {
Some(inner)
} else {
cur_mapping
};
let new_entry = if inner.kind() == SyntaxKind::MappingEntry {
Some(inner)
} else {
cur_entry
};
if let Some(found) = walk(inner, target, pos, new_mapping, new_entry) {
return Some(found);
}
}
}
}
pos += len;
}
None
}
walk(node, target, base, None, None)
}
fn dominant_sibling_value_kind(mapping: &GreenNode, exclude: &GreenNode) -> Option<SyntaxKind> {
let exclude_ptr: *const GreenNode = exclude;
let mut plain = 0usize;
let mut single = 0usize;
let mut double = 0usize;
for child in mapping.children() {
if let GreenChild::Node(entry) = child {
if entry.kind() != SyntaxKind::MappingEntry {
continue;
}
let entry_ptr: *const GreenNode = entry;
if core::ptr::eq(entry_ptr, exclude_ptr) {
continue;
}
match entry_value_scalar_kind(entry) {
Some(SyntaxKind::PlainScalar) => plain += 1,
Some(SyntaxKind::SingleQuotedScalar) => single += 1,
Some(SyntaxKind::DoubleQuotedScalar) => double += 1,
_ => {}
}
}
}
if single >= 2 && single > double && single > plain {
return Some(SyntaxKind::SingleQuotedScalar);
}
if double >= 2 && double > single && double > plain {
return Some(SyntaxKind::DoubleQuotedScalar);
}
None
}
fn entry_value_scalar_kind(entry: &GreenNode) -> Option<SyntaxKind> {
let mut after_colon = false;
for child in entry.children() {
match child {
GreenChild::Token { kind, .. } => {
if *kind == SyntaxKind::ColonIndicator {
after_colon = true;
continue;
}
if after_colon
&& matches!(
kind,
SyntaxKind::PlainScalar
| SyntaxKind::SingleQuotedScalar
| SyntaxKind::DoubleQuotedScalar
| SyntaxKind::LiteralScalar
| SyntaxKind::FoldedScalar
)
{
return Some(*kind);
}
}
GreenChild::Node(_) => {
if after_colon {
return None;
}
}
}
}
None
}
struct SiteContext {
kind: SyntaxKind,
neighbour: Option<SyntaxKind>,
entry_col: usize,
}
fn format_value_for_site(value: &Value, ctx: &SiteContext) -> Result<String> {
match value {
Value::Null => Ok("null".to_string()),
Value::Bool(true) => Ok("true".to_string()),
Value::Bool(false) => Ok("false".to_string()),
Value::Number(n) => Ok(format_number(n)),
Value::String(s) => format_string_for_site(s, ctx),
Value::Sequence(_) | Value::Mapping(_) => Err(Error::Parse(
"set_value cannot replace a scalar with a collection (use `set` with a fragment)"
.into(),
)),
Value::Tagged(t) => format_value_for_site(t.value(), ctx),
}
}
fn format_number(n: &Number) -> String {
n.to_string()
}
fn format_string_for_site(s: &str, ctx: &SiteContext) -> Result<String> {
if s.contains('\n') && can_use_block_literal(s) && is_block_site(ctx.kind) {
return Ok(format_block_literal(s, ctx.entry_col));
}
match ctx.kind {
SyntaxKind::PlainScalar => {
match ctx.neighbour {
Some(SyntaxKind::SingleQuotedScalar) if !s.contains('\n') => {
Ok(format_single_quoted(s))
}
Some(SyntaxKind::DoubleQuotedScalar) => Ok(format_double_quoted(s)),
_ => {
if is_plain_safe(s) {
Ok(s.to_string())
} else {
Ok(format_double_quoted(s))
}
}
}
}
SyntaxKind::SingleQuotedScalar => Ok(format_single_quoted(s)),
SyntaxKind::DoubleQuotedScalar => Ok(format_double_quoted(s)),
SyntaxKind::LiteralScalar | SyntaxKind::FoldedScalar => {
if !s.contains('\n') {
if is_plain_safe(s) {
Ok(s.to_string())
} else {
Ok(format_double_quoted(s))
}
} else if can_use_block_literal(s) {
Ok(format_block_literal(s, ctx.entry_col))
} else {
Err(Error::Parse(
"set_value: existing block scalar can only be replaced with a string \
whose content lines do not begin with whitespace or control characters yet"
.into(),
))
}
}
_ => Err(Error::Parse(
"set_value: target site is not a scalar leaf".into(),
)),
}
}
fn is_block_site(kind: SyntaxKind) -> bool {
matches!(
kind,
SyntaxKind::PlainScalar
| SyntaxKind::SingleQuotedScalar
| SyntaxKind::DoubleQuotedScalar
| SyntaxKind::LiteralScalar
| SyntaxKind::FoldedScalar
)
}
fn can_use_block_literal(s: &str) -> bool {
if s.is_empty() {
return false;
}
for &b in s.as_bytes() {
if (b < 0x20 && b != b'\n' && b != b'\t') || b == 0x7F {
return false;
}
}
let trimmed = s.strip_suffix('\n').unwrap_or(s);
if trimmed.ends_with('\n') {
return false;
}
for line in trimmed.split('\n') {
if line.starts_with(' ') || line.starts_with('\t') {
return false;
}
}
true
}
fn format_block_literal(s: &str, entry_col: usize) -> String {
let trailing_nl = s.ends_with('\n');
let body = if trailing_nl { &s[..s.len() - 1] } else { s };
let indent_str = " ".repeat(entry_col + 2);
let mut out =
String::with_capacity(s.len() + 8 + indent_str.len() * (body.matches('\n').count() + 1));
out.push('|');
if !trailing_nl {
out.push('-');
}
out.push('\n');
let mut first = true;
for line in body.split('\n') {
if !first {
out.push('\n');
}
first = false;
out.push_str(&indent_str);
out.push_str(line);
}
out
}
fn entry_indent_column(source: &str, pos: usize) -> usize {
let bytes = source.as_bytes();
let mut line_start = pos.min(bytes.len());
while line_start > 0 && bytes[line_start - 1] != b'\n' {
line_start -= 1;
}
let mut col = line_start;
while col < bytes.len() && (bytes[col] == b' ' || bytes[col] == b'\t') {
col += 1;
}
col - line_start
}
fn is_plain_safe(s: &str) -> bool {
if s.is_empty() {
return false;
}
if matches!(
s,
"null"
| "Null"
| "NULL"
| "~"
| "true"
| "True"
| "TRUE"
| "false"
| "False"
| "FALSE"
| "yes"
| "Yes"
| "YES"
| "no"
| "No"
| "NO"
| "on"
| "On"
| "ON"
| "off"
| "Off"
| "OFF"
) {
return false;
}
if looks_like_number(s) {
return false;
}
let bytes = s.as_bytes();
let first = bytes[0];
if matches!(
first,
b'-' | b'?'
| b':'
| b','
| b'['
| b']'
| b'{'
| b'}'
| b'#'
| b'&'
| b'*'
| b'!'
| b'|'
| b'>'
| b'\''
| b'"'
| b'%'
| b'@'
| b'`'
| b' '
| b'\t'
) {
return false;
}
if matches!(*bytes.last().unwrap(), b' ' | b'\t') {
return false;
}
let mut prev: u8 = 0;
for &b in bytes {
if b < 0x20 || b == 0x7F {
return false;
}
if b == b' ' && prev == b':' {
return false;
}
if b == b'#' && prev == b' ' {
return false;
}
prev = b;
}
true
}
fn looks_like_number(s: &str) -> bool {
let mut chars = s.chars();
let first = match chars.next() {
Some(c) => c,
None => return false,
};
let candidate = matches!(first, '-' | '+' | '.') || first.is_ascii_digit();
if !candidate {
return false;
}
let scalar = crate::streaming::resolve_plain_ext(s, false, false, false, false, false);
matches!(
scalar,
crate::streaming::Scalar::Int(_) | crate::streaming::Scalar::Float(_)
)
}
fn format_single_quoted(s: &str) -> String {
let mut out = String::with_capacity(s.len() + 2);
out.push('\'');
for ch in s.chars() {
if ch == '\'' {
out.push_str("''");
} else {
out.push(ch);
}
}
out.push('\'');
out
}
fn format_double_quoted(s: &str) -> String {
let mut out = String::with_capacity(s.len() + 2);
out.push('"');
for ch in s.chars() {
match ch {
'"' => out.push_str("\\\""),
'\\' => out.push_str("\\\\"),
'\n' => out.push_str("\\n"),
'\r' => out.push_str("\\r"),
'\t' => out.push_str("\\t"),
'\x08' => out.push_str("\\b"),
'\x0c' => out.push_str("\\f"),
c if (c as u32) < 0x20 => {
let _ = write!(&mut out, "\\u{:04X}", c as u32);
}
c => out.push(c),
}
}
out.push('"');
out
}