Skip to main content

yaml_edit/
lib.rs

1#![deny(missing_docs)]
2#![allow(clippy::type_complexity)]
3#![warn(clippy::unnecessary_to_owned)]
4#![warn(clippy::redundant_clone)]
5#![warn(clippy::inefficient_to_string)]
6#![warn(clippy::manual_string_new)]
7#![doc = include_str!(concat!(env!("CARGO_MANIFEST_DIR"), "/README.md"))]
8
9//! A lossless YAML parser and editor.
10//!
11//! This library provides a lossless parser for YAML files, preserving
12//! all whitespace, comments, and formatting. It is based on the [rowan] library.
13//!
14//! # Mutability Model
15//!
16//! **Important:** This library uses interior mutability through the rowan library.
17//! This means methods taking `&self` can still modify the underlying syntax tree.
18//!
19//! ## What This Means
20//!
21//! - Types like [`Mapping`], [`Sequence`], and [`Document`] can mutate even from `&self`
22//! - Changes are immediately visible to all holders of the syntax tree
23//! - You don't need to mark variables as `mut` to call mutation methods
24//!
25//! ## Example
26//!
27//! ```rust
28//! use yaml_edit::Document;
29//! use std::str::FromStr;
30//!
31//! let doc = Document::from_str("name: Alice").unwrap();  // Note: not `mut`
32//! let mapping = doc.as_mapping().unwrap();  // Note: not `mut`
33//!
34//! // Yet we can still mutate!
35//! mapping.set("age", 30);  // This works despite `mapping` not being `mut`
36//!
37//! assert_eq!(doc.to_string(), "name: Alice\nage: 30\n");
38//! ```
39//!
40//! ## Why This Design?
41//!
42//! This design enables:
43//! - **Efficient in-place mutations** without cloning the entire tree
44//! - **Sharing references** while still allowing modifications
45//! - **Lossless preservation** of formatting and comments during edits
46//!
47//! If you're familiar with `RefCell` or `Rc`, this is similar - the tree uses
48//! internal synchronization to allow shared mutable access.
49//!
50//! ## Migration Note
51//!
52//! If you're coming from other YAML libraries, this might seem unusual. In most
53//! libraries, you need `&mut` to modify data. Here, you don't. This is intentional
54//! and allows for a more flexible API while maintaining the guarantees of Rust's
55//! borrow checker.
56//!
57//! # Getting Started
58//!
59//! ## Parsing YAML
60//!
61//! ```rust
62//! use yaml_edit::Document;
63//! use std::str::FromStr;
64//!
65//! let yaml = Document::from_str("name: Alice\nage: 30").unwrap();
66//! let mapping = yaml.as_mapping().unwrap();
67//!
68//! // Get values
69//! let name = mapping.get("name").unwrap();
70//! assert_eq!(name.as_scalar().unwrap().to_string(), "Alice");
71//! ```
72//!
73//! ## Modifying YAML
74//!
75//! ```rust
76//! use yaml_edit::Document;
77//! use std::str::FromStr;
78//!
79//! let yaml = Document::from_str("name: Alice").unwrap();
80//! let mapping = yaml.as_mapping().unwrap();
81//!
82//! // Add a new field
83//! mapping.set("age", 30);
84//!
85//! // Update an existing field
86//! mapping.set("name", "Bob");
87//!
88//! // Remove a field
89//! mapping.remove("age");
90//! ```
91//!
92//! ## Path-based Access
93//!
94//! ```rust
95//! use yaml_edit::{Document, path::YamlPath};
96//! use std::str::FromStr;
97//!
98//! let yaml = Document::from_str("server:\n  host: localhost").unwrap();
99//!
100//! // Get nested values
101//! let host = yaml.get_path("server.host");
102//! assert!(host.is_some());
103//!
104//! // Set nested values (creates intermediate mappings)
105//! yaml.set_path("server.port", 8080);
106//! yaml.set_path("database.host", "db.example.com");
107//! ```
108//!
109//! ## Iterating Over Collections
110//!
111//! ```rust
112//! use yaml_edit::Document;
113//! use std::str::FromStr;
114//!
115//! let yaml = Document::from_str("a: 1\nb: 2\nc: 3").unwrap();
116//! let mapping = yaml.as_mapping().unwrap();
117//!
118//! // Iterate over key-value pairs
119//! for (key, value) in &mapping {
120//!     println!("{:?}: {:?}", key, value);
121//! }
122//!
123//! // Use iterator methods
124//! let count = (&mapping).into_iter().count();
125//! assert_eq!(count, 3);
126//! ```
127//!
128//! ## Working with Sequences
129//!
130//! ```rust
131//! use yaml_edit::Document;
132//! use std::str::FromStr;
133//!
134//! let yaml = Document::from_str("items:\n  - apple\n  - banana").unwrap();
135//! let mapping = yaml.as_mapping().unwrap();
136//! let sequence = mapping.get_sequence("items").unwrap();
137//!
138//! // Iterate over items
139//! for item in &sequence {
140//!     println!("{:?}", item);
141//! }
142//!
143//! // Get specific item
144//! let first = sequence.get(0);
145//! assert!(first.is_some());
146//! ```
147//!
148//! ## Schema Validation
149//!
150//! ```rust
151//! use yaml_edit::{Document, SchemaValidator};
152//! use std::str::FromStr;
153//!
154//! let yaml = Document::from_str("name: Alice\nage: 30").unwrap();
155//!
156//! // Validate against JSON schema (no custom types)
157//! let result = SchemaValidator::json().validate(&yaml);
158//! assert!(result.is_ok());
159//! ```
160//!
161//! ## Position Tracking
162//!
163//! ```rust
164//! use yaml_edit::Document;
165//! use std::str::FromStr;
166//!
167//! let text = "name: Alice\nage: 30";
168//! let doc = Document::from_str(text).unwrap();
169//!
170//! // Get line/column positions
171//! let start = doc.start_position(text);
172//! assert_eq!(start.line, 1);
173//! assert_eq!(start.column, 1);
174//! ```
175
176pub mod anchor_resolution;
177#[doc(inline)]
178pub use anchor_resolution::{
179    AnchorRegistry, DocumentMergedExt, DocumentResolvedExt, MappingMergedExt, MergedMapping,
180    MergedView,
181};
182pub mod mapping_view;
183#[doc(inline)]
184pub use mapping_view::MappingView;
185mod as_yaml;
186mod builder;
187pub mod custom_tags;
188pub mod debug;
189mod error;
190pub mod error_recovery;
191mod lex;
192mod nodes;
193mod parse;
194pub mod path;
195mod scalar;
196mod schema;
197pub mod validator;
198mod value;
199pub mod visitor;
200mod yaml;
201
202pub use as_yaml::{yaml_eq, AsYaml, YamlKind, YamlNode};
203pub use builder::{MappingBuilder, SequenceBuilder, YamlBuilder};
204pub use error::{YamlError, YamlResult};
205pub use lex::{
206    lex, lex_with_validation, lex_with_validation_config, SyntaxKind, ValidationConfig,
207    WhitespaceError, WhitespaceErrorCategory,
208};
209pub use parse::Parse;
210pub use scalar::{ScalarStyle, ScalarType, ScalarValue};
211pub use schema::{
212    CustomSchema, CustomValidationResult, Schema, SchemaValidator, ValidationError,
213    ValidationErrorKind, ValidationResult,
214};
215pub use yaml::{
216    Alias, Directive, Document, Lang, Mapping, MappingEntry, Scalar, ScalarConversionError,
217    Sequence, Set, TaggedNode, YamlFile,
218};
219
220/// Advanced API for power users who need direct access to the underlying syntax tree.
221///
222/// This module provides low-level access to the rowan syntax tree implementation.
223/// Most users should not need this module - the main API provides high-level
224/// wrappers that are easier to use and don't expose implementation details.
225///
226/// # Example
227///
228/// ```rust
229/// use yaml_edit::{Document, advanced};
230/// use std::str::FromStr;
231///
232/// let doc = Document::from_str("key: value").unwrap();
233/// let mapping = doc.as_mapping().unwrap();
234///
235/// // Get a value node
236/// if let Some(value) = mapping.get("key") {
237///     // YamlNode provides access to the underlying structure
238///     println!("Found value: {}", value.to_string());
239/// }
240/// ```
241pub mod advanced {
242    pub use rowan::TextRange;
243
244    use crate::yaml::SyntaxNode;
245    use crate::TextPosition;
246
247    /// Get the text range of a syntax node
248    pub fn syntax_node_range(node: &SyntaxNode) -> TextRange {
249        node.text_range()
250    }
251
252    /// Convert a TextPosition to rowan's TextRange
253    pub fn text_position_to_range(pos: TextPosition) -> TextRange {
254        pos.into()
255    }
256
257    /// Convert rowan's TextRange to TextPosition
258    pub fn text_range_to_position(range: TextRange) -> TextPosition {
259        range.into()
260    }
261}
262
263// Re-export custom tags API
264pub use custom_tags::{
265    // Built-in handlers
266    CompressedBinaryHandler,
267    CustomTagError,
268    CustomTagHandler,
269    CustomTagParser,
270    CustomTagRegistry,
271    EnvVarHandler,
272    JsonHandler,
273    TimestampHandler,
274};
275
276/// A text position in a YAML document, represented as byte offsets.
277#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
278pub struct TextPosition {
279    /// The start byte offset
280    pub start: u32,
281    /// The end byte offset (exclusive)
282    pub end: u32,
283}
284
285impl TextPosition {
286    /// Create a new text position
287    pub fn new(start: u32, end: u32) -> Self {
288        Self { start, end }
289    }
290
291    /// Get the length of this text range
292    pub fn len(&self) -> u32 {
293        self.end - self.start
294    }
295
296    /// Check if this range is empty
297    pub fn is_empty(&self) -> bool {
298        self.start == self.end
299    }
300}
301
302/// A line and column position in a YAML document (1-indexed).
303///
304/// Line and column numbers are both 1-indexed (first line is line 1, first column is column 1).
305#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
306pub struct LineColumn {
307    /// Line number (1-indexed)
308    pub line: usize,
309    /// Column number (1-indexed, counts Unicode scalar values)
310    pub column: usize,
311}
312
313impl LineColumn {
314    /// Create a new line/column position
315    pub fn new(line: usize, column: usize) -> Self {
316        Self { line, column }
317    }
318}
319
320impl std::fmt::Display for LineColumn {
321    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
322        write!(f, "{}:{}", self.line, self.column)
323    }
324}
325
326/// Convert a byte offset to line and column numbers in the given text.
327///
328/// Line and column numbers are 1-indexed. Column numbers count Unicode scalar values,
329/// not bytes or grapheme clusters.
330///
331/// # Arguments
332///
333/// * `text` - The full source text
334/// * `byte_offset` - Byte offset into the text
335///
336/// # Returns
337///
338/// `LineColumn` with 1-indexed line and column numbers, or line 1, column 1 if offset is out of bounds.
339///
340/// # Examples
341///
342/// ```
343/// use yaml_edit::byte_offset_to_line_column;
344///
345/// let text = "line 1\nline 2\nline 3";
346/// let pos = byte_offset_to_line_column(text, 7); // Start of "line 2"
347/// assert_eq!(pos.line, 2);
348/// assert_eq!(pos.column, 1);
349/// ```
350pub fn byte_offset_to_line_column(text: &str, byte_offset: usize) -> LineColumn {
351    let mut line = 1;
352    let mut column = 1;
353
354    for (i, ch) in text.char_indices() {
355        if i >= byte_offset {
356            break;
357        }
358
359        if ch == '\n' {
360            line += 1;
361            column = 1;
362        } else {
363            column += 1;
364        }
365    }
366
367    LineColumn { line, column }
368}
369
370impl From<rowan::TextRange> for TextPosition {
371    fn from(range: rowan::TextRange) -> Self {
372        Self {
373            start: u32::from(range.start()),
374            end: u32::from(range.end()),
375        }
376    }
377}
378
379impl From<TextPosition> for rowan::TextRange {
380    fn from(pos: TextPosition) -> Self {
381        rowan::TextRange::new(pos.start.into(), pos.end.into())
382    }
383}
384
385/// The kind of parse error, enabling structured matching without string parsing.
386#[derive(Debug, Clone, PartialEq, Eq, Hash)]
387pub enum ParseErrorKind {
388    /// An unclosed flow sequence (missing `]`)
389    UnclosedFlowSequence,
390    /// An unclosed flow mapping (missing `}`)
391    UnclosedFlowMapping,
392    /// An unterminated quoted string (missing closing quote)
393    UnterminatedString,
394    /// Any other parse error
395    Other,
396}
397
398/// A positioned parse error containing location information.
399#[derive(Debug, Clone, PartialEq, Eq, Hash)]
400pub struct PositionedParseError {
401    /// The error message
402    pub message: String,
403    /// The text range where the error occurred
404    pub range: TextPosition,
405    /// Optional error code for categorization
406    pub code: Option<String>,
407    /// Structured error kind
408    pub kind: ParseErrorKind,
409}
410
411impl PositionedParseError {
412    /// Get the line and column where this error starts (if source text is available).
413    ///
414    /// # Arguments
415    ///
416    /// * `source_text` - The original YAML source text
417    ///
418    /// # Returns
419    ///
420    /// `LineColumn` with 1-indexed line and column numbers.
421    ///
422    /// # Examples
423    ///
424    /// ```
425    /// use yaml_edit::{YamlFile, Parse};
426    /// use std::str::FromStr;
427    ///
428    /// let text = "invalid:\n  - [unclosed";
429    /// let parse = Parse::parse_yaml(text);
430    ///
431    /// if let Some(err) = parse.positioned_errors().first() {
432    ///     let pos = err.start_position(text);
433    ///     assert_eq!(pos.line, 2);
434    /// }
435    /// ```
436    pub fn start_position(&self, source_text: &str) -> LineColumn {
437        byte_offset_to_line_column(source_text, self.range.start as usize)
438    }
439
440    /// Get the line and column where this error ends (if source text is available).
441    ///
442    /// # Arguments
443    ///
444    /// * `source_text` - The original YAML source text
445    ///
446    /// # Returns
447    ///
448    /// `LineColumn` with 1-indexed line and column numbers.
449    pub fn end_position(&self, source_text: &str) -> LineColumn {
450        byte_offset_to_line_column(source_text, self.range.end as usize)
451    }
452}
453
454impl std::fmt::Display for PositionedParseError {
455    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
456        write!(f, "{}", self.message)
457    }
458}
459
460impl std::error::Error for PositionedParseError {}
461
462/// The indentation to use when writing a YAML file.
463#[derive(Debug, Clone, Copy, PartialEq, Eq)]
464pub enum Indentation {
465    /// Use the same indentation as the original line for the value.
466    FieldNameLength,
467
468    /// The number of spaces to use for indentation.
469    Spaces(u32),
470}
471
472impl Default for Indentation {
473    fn default() -> Self {
474        Indentation::Spaces(2)
475    }
476}
477
478#[cfg(test)]
479mod tests {
480    use super::*;
481    use std::str::FromStr;
482
483    #[test]
484    fn test_byte_offset_to_line_column_basic() {
485        let text = "line 1\nline 2\nline 3";
486
487        let pos = byte_offset_to_line_column(text, 0);
488        assert_eq!(pos.line, 1);
489        assert_eq!(pos.column, 1);
490
491        let pos = byte_offset_to_line_column(text, 7);
492        assert_eq!(pos.line, 2);
493        assert_eq!(pos.column, 1);
494
495        let pos = byte_offset_to_line_column(text, 10);
496        assert_eq!(pos.line, 2);
497        assert_eq!(pos.column, 4);
498
499        let pos = byte_offset_to_line_column(text, 14);
500        assert_eq!(pos.line, 3);
501        assert_eq!(pos.column, 1);
502    }
503
504    #[test]
505    fn test_byte_offset_to_line_column_unicode() {
506        let text = "hello\n世界\nworld";
507
508        let pos = byte_offset_to_line_column(text, 0);
509        assert_eq!(pos.line, 1);
510        assert_eq!(pos.column, 1);
511
512        let pos = byte_offset_to_line_column(text, 6);
513        assert_eq!(pos.line, 2);
514        assert_eq!(pos.column, 1);
515
516        // After first Chinese character "世" (3 bytes)
517        let pos = byte_offset_to_line_column(text, 9);
518        assert_eq!(pos.line, 2);
519        assert_eq!(pos.column, 2);
520    }
521
522    #[test]
523    fn test_line_column_display() {
524        let pos = LineColumn::new(42, 17);
525        assert_eq!(format!("{}", pos), "42:17");
526
527        let pos2 = LineColumn::new(1, 1);
528        assert_eq!(format!("{}", pos2), "1:1");
529    }
530
531    #[test]
532    fn test_document_position() {
533        let text = "name: Alice\nage: 30";
534        let doc = Document::from_str(text).unwrap();
535
536        let start = doc.start_position(text);
537        assert_eq!(start.line, 1);
538        assert_eq!(start.column, 1);
539
540        let range = doc.byte_range();
541        assert_eq!(range.start, 0);
542        assert!(range.end > 0);
543    }
544
545    #[test]
546    fn test_mapping_position() {
547        let text = "server:\n  host: localhost\n  port: 8080";
548        let doc = Document::from_str(text).unwrap();
549        let mapping = doc.as_mapping().unwrap();
550
551        let start = mapping.start_position(text);
552        assert_eq!(start.line, 1);
553        assert_eq!(start.column, 1);
554
555        let server_mapping = mapping.get_mapping("server").unwrap();
556        let server_start = server_mapping.start_position(text);
557        assert_eq!(server_start.line, 2);
558    }
559
560    #[test]
561    fn test_scalar_position_via_nodes() {
562        let text = "name: Alice\nage: 30";
563        let doc = Document::from_str(text).unwrap();
564        let mapping = doc.as_mapping().unwrap();
565
566        let entries: Vec<_> = mapping.entries().collect();
567        assert!(entries.len() >= 2);
568
569        let first_entry = &entries[0];
570        let key_node = first_entry.key_node().unwrap();
571        assert_eq!(key_node.to_string().trim(), "name");
572
573        let value_node = first_entry.value_node().unwrap();
574        assert_eq!(value_node.to_string().trim(), "Alice");
575    }
576
577    #[test]
578    fn test_sequence_position() {
579        let text = "items:\n  - apple\n  - banana";
580        let doc = Document::from_str(text).unwrap();
581        let mapping = doc.as_mapping().unwrap();
582
583        let items_node = mapping.get("items").unwrap();
584        assert!(items_node.as_sequence().is_some());
585    }
586
587    #[test]
588    fn test_positioned_parse_error() {
589        let text = "invalid:\n  - [unclosed";
590        let parse = Parse::parse_yaml(text);
591
592        let errors = parse.positioned_errors();
593        if errors.is_empty() {
594            return;
595        }
596
597        let err = &errors[0];
598        let start = err.start_position(text);
599        assert_eq!(start.line, 2);
600    }
601
602    #[test]
603    fn test_multiline_document_byte_offsets() {
604        let text = "# Comment\nname: Alice\n\nage: 30";
605        let doc = Document::from_str(text).unwrap();
606
607        let range = doc.byte_range();
608        assert_eq!(range.start, 10);
609        assert_eq!(range.end, 30);
610
611        let start = doc.start_position(text);
612        assert_eq!(start.line, 2);
613        assert_eq!(start.column, 1);
614    }
615
616    #[test]
617    fn test_nested_mapping_byte_ranges() {
618        let text = "server:\n  database:\n    host: localhost";
619        let doc = Document::from_str(text).unwrap();
620        let mapping = doc.as_mapping().unwrap();
621
622        let server_mapping = mapping.get_mapping("server").unwrap();
623        let server_range = server_mapping.byte_range();
624
625        assert!(server_range.end > server_range.start);
626
627        let server_pos = server_mapping.start_position(text);
628        assert!(server_pos.line > 0);
629    }
630
631    #[test]
632    fn test_empty_lines_positions() {
633        let text = "a: 1\n\n\nb: 2";
634
635        let pos1 = byte_offset_to_line_column(text, 0);
636        assert_eq!(pos1.line, 1);
637
638        let pos2 = byte_offset_to_line_column(text, 7);
639        assert_eq!(pos2.line, 4);
640    }
641
642    #[test]
643    fn test_document_end_position() {
644        let text = "key: value";
645        let doc = Document::from_str(text).unwrap();
646
647        let start = doc.start_position(text);
648        let end = doc.end_position(text);
649
650        assert_eq!(start.line, 1);
651        assert!(end.column >= start.column);
652    }
653
654    #[test]
655    fn test_mapping_end_position() {
656        let text = "a: 1\nb: 2";
657        let doc = Document::from_str(text).unwrap();
658        let mapping = doc.as_mapping().unwrap();
659
660        let start = mapping.start_position(text);
661        let end = mapping.end_position(text);
662
663        assert_eq!(start.line, 1);
664        assert_eq!(end.line, 2);
665    }
666}