yaml_edit/lib.rs
1#![deny(missing_docs)]
2#![allow(clippy::type_complexity)]
3#![warn(clippy::unnecessary_to_owned)]
4#![warn(clippy::redundant_clone)]
5#![warn(clippy::inefficient_to_string)]
6#![warn(clippy::manual_string_new)]
7#![doc = include_str!(concat!(env!("CARGO_MANIFEST_DIR"), "/README.md"))]
8
9//! A lossless YAML parser and editor.
10//!
11//! This library provides a lossless parser for YAML files, preserving
12//! all whitespace, comments, and formatting. It is based on the [rowan] library.
13//!
14//! # Mutability Model
15//!
16//! **Important:** This library uses interior mutability through the rowan library.
17//! This means methods taking `&self` can still modify the underlying syntax tree.
18//!
19//! ## What This Means
20//!
21//! - Types like [`Mapping`], [`Sequence`], and [`Document`] can mutate even from `&self`
22//! - Changes are immediately visible to all holders of the syntax tree
23//! - You don't need to mark variables as `mut` to call mutation methods
24//!
25//! ## Example
26//!
27//! ```rust
28//! use yaml_edit::Document;
29//! use std::str::FromStr;
30//!
31//! let doc = Document::from_str("name: Alice").unwrap(); // Note: not `mut`
32//! let mapping = doc.as_mapping().unwrap(); // Note: not `mut`
33//!
34//! // Yet we can still mutate!
35//! mapping.set("age", 30); // This works despite `mapping` not being `mut`
36//!
37//! assert_eq!(doc.to_string(), "name: Alice\nage: 30\n");
38//! ```
39//!
40//! ## Why This Design?
41//!
42//! This design enables:
43//! - **Efficient in-place mutations** without cloning the entire tree
44//! - **Sharing references** while still allowing modifications
45//! - **Lossless preservation** of formatting and comments during edits
46//!
47//! If you're familiar with `RefCell` or `Rc`, this is similar - the tree uses
48//! internal synchronization to allow shared mutable access.
49//!
50//! ## Migration Note
51//!
52//! If you're coming from other YAML libraries, this might seem unusual. In most
53//! libraries, you need `&mut` to modify data. Here, you don't. This is intentional
54//! and allows for a more flexible API while maintaining the guarantees of Rust's
55//! borrow checker.
56//!
57//! # Getting Started
58//!
59//! ## Parsing YAML
60//!
61//! ```rust
62//! use yaml_edit::Document;
63//! use std::str::FromStr;
64//!
65//! let yaml = Document::from_str("name: Alice\nage: 30").unwrap();
66//! let mapping = yaml.as_mapping().unwrap();
67//!
68//! // Get values
69//! let name = mapping.get("name").unwrap();
70//! assert_eq!(name.as_scalar().unwrap().to_string(), "Alice");
71//! ```
72//!
73//! ## Modifying YAML
74//!
75//! ```rust
76//! use yaml_edit::Document;
77//! use std::str::FromStr;
78//!
79//! let yaml = Document::from_str("name: Alice").unwrap();
80//! let mapping = yaml.as_mapping().unwrap();
81//!
82//! // Add a new field
83//! mapping.set("age", 30);
84//!
85//! // Update an existing field
86//! mapping.set("name", "Bob");
87//!
88//! // Remove a field
89//! mapping.remove("age");
90//! ```
91//!
92//! ## Path-based Access
93//!
94//! ```rust
95//! use yaml_edit::{Document, path::YamlPath};
96//! use std::str::FromStr;
97//!
98//! let yaml = Document::from_str("server:\n host: localhost").unwrap();
99//!
100//! // Get nested values
101//! let host = yaml.get_path("server.host");
102//! assert!(host.is_some());
103//!
104//! // Set nested values (creates intermediate mappings)
105//! yaml.set_path("server.port", 8080);
106//! yaml.set_path("database.host", "db.example.com");
107//! ```
108//!
109//! ## Iterating Over Collections
110//!
111//! ```rust
112//! use yaml_edit::Document;
113//! use std::str::FromStr;
114//!
115//! let yaml = Document::from_str("a: 1\nb: 2\nc: 3").unwrap();
116//! let mapping = yaml.as_mapping().unwrap();
117//!
118//! // Iterate over key-value pairs
119//! for (key, value) in &mapping {
120//! println!("{:?}: {:?}", key, value);
121//! }
122//!
123//! // Use iterator methods
124//! let count = (&mapping).into_iter().count();
125//! assert_eq!(count, 3);
126//! ```
127//!
128//! ## Working with Sequences
129//!
130//! ```rust
131//! use yaml_edit::Document;
132//! use std::str::FromStr;
133//!
134//! let yaml = Document::from_str("items:\n - apple\n - banana").unwrap();
135//! let mapping = yaml.as_mapping().unwrap();
136//! let sequence = mapping.get_sequence("items").unwrap();
137//!
138//! // Iterate over items
139//! for item in &sequence {
140//! println!("{:?}", item);
141//! }
142//!
143//! // Get specific item
144//! let first = sequence.get(0);
145//! assert!(first.is_some());
146//! ```
147//!
148//! ## Schema Validation
149//!
150//! ```rust
151//! use yaml_edit::{Document, SchemaValidator};
152//! use std::str::FromStr;
153//!
154//! let yaml = Document::from_str("name: Alice\nage: 30").unwrap();
155//!
156//! // Validate against JSON schema (no custom types)
157//! let result = SchemaValidator::json().validate(&yaml);
158//! assert!(result.is_ok());
159//! ```
160//!
161//! ## Position Tracking
162//!
163//! ```rust
164//! use yaml_edit::Document;
165//! use std::str::FromStr;
166//!
167//! let text = "name: Alice\nage: 30";
168//! let doc = Document::from_str(text).unwrap();
169//!
170//! // Get line/column positions
171//! let start = doc.start_position(text);
172//! assert_eq!(start.line, 1);
173//! assert_eq!(start.column, 1);
174//! ```
175
176pub mod anchor_resolution;
177mod as_yaml;
178mod builder;
179pub mod custom_tags;
180pub mod debug;
181mod error;
182pub mod error_recovery;
183mod lex;
184mod nodes;
185mod parse;
186pub mod path;
187mod scalar;
188mod schema;
189pub mod validator;
190mod value;
191pub mod visitor;
192mod yaml;
193
194pub use as_yaml::{yaml_eq, AsYaml, YamlKind, YamlNode};
195pub use builder::{MappingBuilder, SequenceBuilder, YamlBuilder};
196pub use error::{YamlError, YamlResult};
197pub use lex::{
198 lex, lex_with_validation, lex_with_validation_config, SyntaxKind, ValidationConfig,
199 WhitespaceError, WhitespaceErrorCategory,
200};
201pub use parse::Parse;
202pub use scalar::{ScalarStyle, ScalarType, ScalarValue};
203pub use schema::{
204 CustomSchema, CustomValidationResult, Schema, SchemaValidator, ValidationError,
205 ValidationErrorKind, ValidationResult,
206};
207pub use yaml::{
208 Alias, Directive, Document, Lang, Mapping, MappingEntry, Scalar, ScalarConversionError,
209 Sequence, Set, TaggedNode, YamlFile,
210};
211
212/// Advanced API for power users who need direct access to the underlying syntax tree.
213///
214/// This module provides low-level access to the rowan syntax tree implementation.
215/// Most users should not need this module - the main API provides high-level
216/// wrappers that are easier to use and don't expose implementation details.
217///
218/// # Example
219///
220/// ```rust
221/// use yaml_edit::{Document, advanced};
222/// use std::str::FromStr;
223///
224/// let doc = Document::from_str("key: value").unwrap();
225/// let mapping = doc.as_mapping().unwrap();
226///
227/// // Get a value node
228/// if let Some(value) = mapping.get("key") {
229/// // YamlNode provides access to the underlying structure
230/// println!("Found value: {}", value.to_string());
231/// }
232/// ```
233pub mod advanced {
234 pub use rowan::TextRange;
235
236 use crate::yaml::SyntaxNode;
237 use crate::TextPosition;
238
239 /// Get the text range of a syntax node
240 pub fn syntax_node_range(node: &SyntaxNode) -> TextRange {
241 node.text_range()
242 }
243
244 /// Convert a TextPosition to rowan's TextRange
245 pub fn text_position_to_range(pos: TextPosition) -> TextRange {
246 pos.into()
247 }
248
249 /// Convert rowan's TextRange to TextPosition
250 pub fn text_range_to_position(range: TextRange) -> TextPosition {
251 range.into()
252 }
253}
254
255// Re-export custom tags API
256pub use custom_tags::{
257 // Built-in handlers
258 CompressedBinaryHandler,
259 CustomTagError,
260 CustomTagHandler,
261 CustomTagParser,
262 CustomTagRegistry,
263 EnvVarHandler,
264 JsonHandler,
265 TimestampHandler,
266};
267
268/// A text position in a YAML document, represented as byte offsets.
269#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
270pub struct TextPosition {
271 /// The start byte offset
272 pub start: u32,
273 /// The end byte offset (exclusive)
274 pub end: u32,
275}
276
277impl TextPosition {
278 /// Create a new text position
279 pub fn new(start: u32, end: u32) -> Self {
280 Self { start, end }
281 }
282
283 /// Get the length of this text range
284 pub fn len(&self) -> u32 {
285 self.end - self.start
286 }
287
288 /// Check if this range is empty
289 pub fn is_empty(&self) -> bool {
290 self.start == self.end
291 }
292}
293
294/// A line and column position in a YAML document (1-indexed).
295///
296/// Line and column numbers are both 1-indexed (first line is line 1, first column is column 1).
297#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
298pub struct LineColumn {
299 /// Line number (1-indexed)
300 pub line: usize,
301 /// Column number (1-indexed, counts Unicode scalar values)
302 pub column: usize,
303}
304
305impl LineColumn {
306 /// Create a new line/column position
307 pub fn new(line: usize, column: usize) -> Self {
308 Self { line, column }
309 }
310}
311
312impl std::fmt::Display for LineColumn {
313 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
314 write!(f, "{}:{}", self.line, self.column)
315 }
316}
317
318/// Convert a byte offset to line and column numbers in the given text.
319///
320/// Line and column numbers are 1-indexed. Column numbers count Unicode scalar values,
321/// not bytes or grapheme clusters.
322///
323/// # Arguments
324///
325/// * `text` - The full source text
326/// * `byte_offset` - Byte offset into the text
327///
328/// # Returns
329///
330/// `LineColumn` with 1-indexed line and column numbers, or line 1, column 1 if offset is out of bounds.
331///
332/// # Examples
333///
334/// ```
335/// use yaml_edit::byte_offset_to_line_column;
336///
337/// let text = "line 1\nline 2\nline 3";
338/// let pos = byte_offset_to_line_column(text, 7); // Start of "line 2"
339/// assert_eq!(pos.line, 2);
340/// assert_eq!(pos.column, 1);
341/// ```
342pub fn byte_offset_to_line_column(text: &str, byte_offset: usize) -> LineColumn {
343 let mut line = 1;
344 let mut column = 1;
345
346 for (i, ch) in text.char_indices() {
347 if i >= byte_offset {
348 break;
349 }
350
351 if ch == '\n' {
352 line += 1;
353 column = 1;
354 } else {
355 column += 1;
356 }
357 }
358
359 LineColumn { line, column }
360}
361
362impl From<rowan::TextRange> for TextPosition {
363 fn from(range: rowan::TextRange) -> Self {
364 Self {
365 start: u32::from(range.start()),
366 end: u32::from(range.end()),
367 }
368 }
369}
370
371impl From<TextPosition> for rowan::TextRange {
372 fn from(pos: TextPosition) -> Self {
373 rowan::TextRange::new(pos.start.into(), pos.end.into())
374 }
375}
376
377/// The kind of parse error, enabling structured matching without string parsing.
378#[derive(Debug, Clone, PartialEq, Eq, Hash)]
379pub enum ParseErrorKind {
380 /// An unclosed flow sequence (missing `]`)
381 UnclosedFlowSequence,
382 /// An unclosed flow mapping (missing `}`)
383 UnclosedFlowMapping,
384 /// An unterminated quoted string (missing closing quote)
385 UnterminatedString,
386 /// Any other parse error
387 Other,
388}
389
390/// A positioned parse error containing location information.
391#[derive(Debug, Clone, PartialEq, Eq, Hash)]
392pub struct PositionedParseError {
393 /// The error message
394 pub message: String,
395 /// The text range where the error occurred
396 pub range: TextPosition,
397 /// Optional error code for categorization
398 pub code: Option<String>,
399 /// Structured error kind
400 pub kind: ParseErrorKind,
401}
402
403impl PositionedParseError {
404 /// Get the line and column where this error starts (if source text is available).
405 ///
406 /// # Arguments
407 ///
408 /// * `source_text` - The original YAML source text
409 ///
410 /// # Returns
411 ///
412 /// `LineColumn` with 1-indexed line and column numbers.
413 ///
414 /// # Examples
415 ///
416 /// ```
417 /// use yaml_edit::{YamlFile, Parse};
418 /// use std::str::FromStr;
419 ///
420 /// let text = "invalid:\n - [unclosed";
421 /// let parse = Parse::parse_yaml(text);
422 ///
423 /// if let Some(err) = parse.positioned_errors().first() {
424 /// let pos = err.start_position(text);
425 /// assert_eq!(pos.line, 2);
426 /// }
427 /// ```
428 pub fn start_position(&self, source_text: &str) -> LineColumn {
429 byte_offset_to_line_column(source_text, self.range.start as usize)
430 }
431
432 /// Get the line and column where this error ends (if source text is available).
433 ///
434 /// # Arguments
435 ///
436 /// * `source_text` - The original YAML source text
437 ///
438 /// # Returns
439 ///
440 /// `LineColumn` with 1-indexed line and column numbers.
441 pub fn end_position(&self, source_text: &str) -> LineColumn {
442 byte_offset_to_line_column(source_text, self.range.end as usize)
443 }
444}
445
446impl std::fmt::Display for PositionedParseError {
447 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
448 write!(f, "{}", self.message)
449 }
450}
451
452impl std::error::Error for PositionedParseError {}
453
454/// The indentation to use when writing a YAML file.
455#[derive(Debug, Clone, Copy, PartialEq, Eq)]
456pub enum Indentation {
457 /// Use the same indentation as the original line for the value.
458 FieldNameLength,
459
460 /// The number of spaces to use for indentation.
461 Spaces(u32),
462}
463
464impl Default for Indentation {
465 fn default() -> Self {
466 Indentation::Spaces(2)
467 }
468}
469
470#[cfg(test)]
471mod tests {
472 use super::*;
473 use std::str::FromStr;
474
475 #[test]
476 fn test_byte_offset_to_line_column_basic() {
477 let text = "line 1\nline 2\nline 3";
478
479 let pos = byte_offset_to_line_column(text, 0);
480 assert_eq!(pos.line, 1);
481 assert_eq!(pos.column, 1);
482
483 let pos = byte_offset_to_line_column(text, 7);
484 assert_eq!(pos.line, 2);
485 assert_eq!(pos.column, 1);
486
487 let pos = byte_offset_to_line_column(text, 10);
488 assert_eq!(pos.line, 2);
489 assert_eq!(pos.column, 4);
490
491 let pos = byte_offset_to_line_column(text, 14);
492 assert_eq!(pos.line, 3);
493 assert_eq!(pos.column, 1);
494 }
495
496 #[test]
497 fn test_byte_offset_to_line_column_unicode() {
498 let text = "hello\n世界\nworld";
499
500 let pos = byte_offset_to_line_column(text, 0);
501 assert_eq!(pos.line, 1);
502 assert_eq!(pos.column, 1);
503
504 let pos = byte_offset_to_line_column(text, 6);
505 assert_eq!(pos.line, 2);
506 assert_eq!(pos.column, 1);
507
508 // After first Chinese character "世" (3 bytes)
509 let pos = byte_offset_to_line_column(text, 9);
510 assert_eq!(pos.line, 2);
511 assert_eq!(pos.column, 2);
512 }
513
514 #[test]
515 fn test_line_column_display() {
516 let pos = LineColumn::new(42, 17);
517 assert_eq!(format!("{}", pos), "42:17");
518
519 let pos2 = LineColumn::new(1, 1);
520 assert_eq!(format!("{}", pos2), "1:1");
521 }
522
523 #[test]
524 fn test_document_position() {
525 let text = "name: Alice\nage: 30";
526 let doc = Document::from_str(text).unwrap();
527
528 let start = doc.start_position(text);
529 assert_eq!(start.line, 1);
530 assert_eq!(start.column, 1);
531
532 let range = doc.byte_range();
533 assert_eq!(range.start, 0);
534 assert!(range.end > 0);
535 }
536
537 #[test]
538 fn test_mapping_position() {
539 let text = "server:\n host: localhost\n port: 8080";
540 let doc = Document::from_str(text).unwrap();
541 let mapping = doc.as_mapping().unwrap();
542
543 let start = mapping.start_position(text);
544 assert_eq!(start.line, 1);
545 assert_eq!(start.column, 1);
546
547 let server_mapping = mapping.get_mapping("server").unwrap();
548 let server_start = server_mapping.start_position(text);
549 assert_eq!(server_start.line, 2);
550 }
551
552 #[test]
553 fn test_scalar_position_via_nodes() {
554 let text = "name: Alice\nage: 30";
555 let doc = Document::from_str(text).unwrap();
556 let mapping = doc.as_mapping().unwrap();
557
558 let entries: Vec<_> = mapping.entries().collect();
559 assert!(entries.len() >= 2);
560
561 let first_entry = &entries[0];
562 let key_node = first_entry.key_node().unwrap();
563 assert_eq!(key_node.to_string().trim(), "name");
564
565 let value_node = first_entry.value_node().unwrap();
566 assert_eq!(value_node.to_string().trim(), "Alice");
567 }
568
569 #[test]
570 fn test_sequence_position() {
571 let text = "items:\n - apple\n - banana";
572 let doc = Document::from_str(text).unwrap();
573 let mapping = doc.as_mapping().unwrap();
574
575 let items_node = mapping.get("items").unwrap();
576 assert!(items_node.as_sequence().is_some());
577 }
578
579 #[test]
580 fn test_positioned_parse_error() {
581 let text = "invalid:\n - [unclosed";
582 let parse = Parse::parse_yaml(text);
583
584 let errors = parse.positioned_errors();
585 if errors.is_empty() {
586 return;
587 }
588
589 let err = &errors[0];
590 let start = err.start_position(text);
591 assert_eq!(start.line, 2);
592 }
593
594 #[test]
595 fn test_multiline_document_byte_offsets() {
596 let text = "# Comment\nname: Alice\n\nage: 30";
597 let doc = Document::from_str(text).unwrap();
598
599 let range = doc.byte_range();
600 assert_eq!(range.start, 10);
601 assert_eq!(range.end, 30);
602
603 let start = doc.start_position(text);
604 assert_eq!(start.line, 2);
605 assert_eq!(start.column, 1);
606 }
607
608 #[test]
609 fn test_nested_mapping_byte_ranges() {
610 let text = "server:\n database:\n host: localhost";
611 let doc = Document::from_str(text).unwrap();
612 let mapping = doc.as_mapping().unwrap();
613
614 let server_mapping = mapping.get_mapping("server").unwrap();
615 let server_range = server_mapping.byte_range();
616
617 assert!(server_range.end > server_range.start);
618
619 let server_pos = server_mapping.start_position(text);
620 assert!(server_pos.line > 0);
621 }
622
623 #[test]
624 fn test_empty_lines_positions() {
625 let text = "a: 1\n\n\nb: 2";
626
627 let pos1 = byte_offset_to_line_column(text, 0);
628 assert_eq!(pos1.line, 1);
629
630 let pos2 = byte_offset_to_line_column(text, 7);
631 assert_eq!(pos2.line, 4);
632 }
633
634 #[test]
635 fn test_document_end_position() {
636 let text = "key: value";
637 let doc = Document::from_str(text).unwrap();
638
639 let start = doc.start_position(text);
640 let end = doc.end_position(text);
641
642 assert_eq!(start.line, 1);
643 assert!(end.column >= start.column);
644 }
645
646 #[test]
647 fn test_mapping_end_position() {
648 let text = "a: 1\nb: 2";
649 let doc = Document::from_str(text).unwrap();
650 let mapping = doc.as_mapping().unwrap();
651
652 let start = mapping.start_position(text);
653 let end = mapping.end_position(text);
654
655 assert_eq!(start.line, 1);
656 assert_eq!(end.line, 2);
657 }
658}