Skip to main content

yaml_edit/
lib.rs

1#![deny(missing_docs)]
2#![allow(clippy::type_complexity)]
3#![warn(clippy::unnecessary_to_owned)]
4#![warn(clippy::redundant_clone)]
5#![warn(clippy::inefficient_to_string)]
6#![warn(clippy::manual_string_new)]
7#![doc = include_str!(concat!(env!("CARGO_MANIFEST_DIR"), "/README.md"))]
8
9//! A lossless YAML parser and editor.
10//!
11//! This library provides a lossless parser for YAML files, preserving
12//! all whitespace, comments, and formatting. It is based on the [rowan] library.
13//!
14//! # Mutability Model
15//!
16//! **Important:** This library uses interior mutability through the rowan library.
17//! This means methods taking `&self` can still modify the underlying syntax tree.
18//!
19//! ## What This Means
20//!
21//! - Types like [`Mapping`], [`Sequence`], and [`Document`] can mutate even from `&self`
22//! - Changes are immediately visible to all holders of the syntax tree
23//! - You don't need to mark variables as `mut` to call mutation methods
24//!
25//! ## Example
26//!
27//! ```rust
28//! use yaml_edit::Document;
29//! use std::str::FromStr;
30//!
31//! let doc = Document::from_str("name: Alice").unwrap();  // Note: not `mut`
32//! let mapping = doc.as_mapping().unwrap();  // Note: not `mut`
33//!
34//! // Yet we can still mutate!
35//! mapping.set("age", 30);  // This works despite `mapping` not being `mut`
36//!
37//! assert_eq!(doc.to_string(), "name: Alice\nage: 30\n");
38//! ```
39//!
40//! ## Why This Design?
41//!
42//! This design enables:
43//! - **Efficient in-place mutations** without cloning the entire tree
44//! - **Sharing references** while still allowing modifications
45//! - **Lossless preservation** of formatting and comments during edits
46//!
47//! If you're familiar with `RefCell` or `Rc`, this is similar - the tree uses
48//! internal synchronization to allow shared mutable access.
49//!
50//! ## Migration Note
51//!
52//! If you're coming from other YAML libraries, this might seem unusual. In most
53//! libraries, you need `&mut` to modify data. Here, you don't. This is intentional
54//! and allows for a more flexible API while maintaining the guarantees of Rust's
55//! borrow checker.
56//!
57//! # Getting Started
58//!
59//! ## Parsing YAML
60//!
61//! ```rust
62//! use yaml_edit::Document;
63//! use std::str::FromStr;
64//!
65//! let yaml = Document::from_str("name: Alice\nage: 30").unwrap();
66//! let mapping = yaml.as_mapping().unwrap();
67//!
68//! // Get values
69//! let name = mapping.get("name").unwrap();
70//! assert_eq!(name.as_scalar().unwrap().to_string(), "Alice");
71//! ```
72//!
73//! ## Modifying YAML
74//!
75//! ```rust
76//! use yaml_edit::Document;
77//! use std::str::FromStr;
78//!
79//! let yaml = Document::from_str("name: Alice").unwrap();
80//! let mapping = yaml.as_mapping().unwrap();
81//!
82//! // Add a new field
83//! mapping.set("age", 30);
84//!
85//! // Update an existing field
86//! mapping.set("name", "Bob");
87//!
88//! // Remove a field
89//! mapping.remove("age");
90//! ```
91//!
92//! ## Path-based Access
93//!
94//! ```rust
95//! use yaml_edit::{Document, path::YamlPath};
96//! use std::str::FromStr;
97//!
98//! let yaml = Document::from_str("server:\n  host: localhost").unwrap();
99//!
100//! // Get nested values
101//! let host = yaml.get_path("server.host");
102//! assert!(host.is_some());
103//!
104//! // Set nested values (creates intermediate mappings)
105//! yaml.set_path("server.port", 8080);
106//! yaml.set_path("database.host", "db.example.com");
107//! ```
108//!
109//! ## Iterating Over Collections
110//!
111//! ```rust
112//! use yaml_edit::Document;
113//! use std::str::FromStr;
114//!
115//! let yaml = Document::from_str("a: 1\nb: 2\nc: 3").unwrap();
116//! let mapping = yaml.as_mapping().unwrap();
117//!
118//! // Iterate over key-value pairs
119//! for (key, value) in &mapping {
120//!     println!("{:?}: {:?}", key, value);
121//! }
122//!
123//! // Use iterator methods
124//! let count = (&mapping).into_iter().count();
125//! assert_eq!(count, 3);
126//! ```
127//!
128//! ## Working with Sequences
129//!
130//! ```rust
131//! use yaml_edit::Document;
132//! use std::str::FromStr;
133//!
134//! let yaml = Document::from_str("items:\n  - apple\n  - banana").unwrap();
135//! let mapping = yaml.as_mapping().unwrap();
136//! let sequence = mapping.get_sequence("items").unwrap();
137//!
138//! // Iterate over items
139//! for item in &sequence {
140//!     println!("{:?}", item);
141//! }
142//!
143//! // Get specific item
144//! let first = sequence.get(0);
145//! assert!(first.is_some());
146//! ```
147//!
148//! ## Schema Validation
149//!
150//! ```rust
151//! use yaml_edit::{Document, SchemaValidator};
152//! use std::str::FromStr;
153//!
154//! let yaml = Document::from_str("name: Alice\nage: 30").unwrap();
155//!
156//! // Validate against JSON schema (no custom types)
157//! let result = SchemaValidator::json().validate(&yaml);
158//! assert!(result.is_ok());
159//! ```
160//!
161//! ## Position Tracking
162//!
163//! ```rust
164//! use yaml_edit::Document;
165//! use std::str::FromStr;
166//!
167//! let text = "name: Alice\nage: 30";
168//! let doc = Document::from_str(text).unwrap();
169//!
170//! // Get line/column positions
171//! let start = doc.start_position(text);
172//! assert_eq!(start.line, 1);
173//! assert_eq!(start.column, 1);
174//! ```
175
176pub mod anchor_resolution;
177mod as_yaml;
178mod builder;
179pub mod custom_tags;
180pub mod debug;
181mod error;
182pub mod error_recovery;
183mod lex;
184mod nodes;
185mod parse;
186pub mod path;
187mod scalar;
188mod schema;
189pub mod validator;
190mod value;
191pub mod visitor;
192mod yaml;
193
194pub use as_yaml::{yaml_eq, AsYaml, YamlKind, YamlNode};
195pub use builder::{MappingBuilder, SequenceBuilder, YamlBuilder};
196pub use error::{YamlError, YamlResult};
197pub use lex::{
198    lex, lex_with_validation, lex_with_validation_config, SyntaxKind, ValidationConfig,
199    WhitespaceError, WhitespaceErrorCategory,
200};
201pub use parse::Parse;
202pub use scalar::{ScalarStyle, ScalarType, ScalarValue};
203pub use schema::{
204    CustomSchema, CustomValidationResult, Schema, SchemaValidator, ValidationError,
205    ValidationErrorKind, ValidationResult,
206};
207pub use yaml::{
208    Alias, Directive, Document, Lang, Mapping, MappingEntry, Scalar, ScalarConversionError,
209    Sequence, Set, TaggedNode, YamlFile,
210};
211
212/// Advanced API for power users who need direct access to the underlying syntax tree.
213///
214/// This module provides low-level access to the rowan syntax tree implementation.
215/// Most users should not need this module - the main API provides high-level
216/// wrappers that are easier to use and don't expose implementation details.
217///
218/// # Example
219///
220/// ```rust
221/// use yaml_edit::{Document, advanced};
222/// use std::str::FromStr;
223///
224/// let doc = Document::from_str("key: value").unwrap();
225/// let mapping = doc.as_mapping().unwrap();
226///
227/// // Get a value node
228/// if let Some(value) = mapping.get("key") {
229///     // YamlNode provides access to the underlying structure
230///     println!("Found value: {}", value.to_string());
231/// }
232/// ```
233pub mod advanced {
234    pub use rowan::TextRange;
235
236    use crate::yaml::SyntaxNode;
237    use crate::TextPosition;
238
239    /// Get the text range of a syntax node
240    pub fn syntax_node_range(node: &SyntaxNode) -> TextRange {
241        node.text_range()
242    }
243
244    /// Convert a TextPosition to rowan's TextRange
245    pub fn text_position_to_range(pos: TextPosition) -> TextRange {
246        pos.into()
247    }
248
249    /// Convert rowan's TextRange to TextPosition
250    pub fn text_range_to_position(range: TextRange) -> TextPosition {
251        range.into()
252    }
253}
254
255// Re-export custom tags API
256pub use custom_tags::{
257    // Built-in handlers
258    CompressedBinaryHandler,
259    CustomTagError,
260    CustomTagHandler,
261    CustomTagParser,
262    CustomTagRegistry,
263    EnvVarHandler,
264    JsonHandler,
265    TimestampHandler,
266};
267
268/// A text position in a YAML document, represented as byte offsets.
269#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
270pub struct TextPosition {
271    /// The start byte offset
272    pub start: u32,
273    /// The end byte offset (exclusive)
274    pub end: u32,
275}
276
277impl TextPosition {
278    /// Create a new text position
279    pub fn new(start: u32, end: u32) -> Self {
280        Self { start, end }
281    }
282
283    /// Get the length of this text range
284    pub fn len(&self) -> u32 {
285        self.end - self.start
286    }
287
288    /// Check if this range is empty
289    pub fn is_empty(&self) -> bool {
290        self.start == self.end
291    }
292}
293
294/// A line and column position in a YAML document (1-indexed).
295///
296/// Line and column numbers are both 1-indexed (first line is line 1, first column is column 1).
297#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
298pub struct LineColumn {
299    /// Line number (1-indexed)
300    pub line: usize,
301    /// Column number (1-indexed, counts Unicode scalar values)
302    pub column: usize,
303}
304
305impl LineColumn {
306    /// Create a new line/column position
307    pub fn new(line: usize, column: usize) -> Self {
308        Self { line, column }
309    }
310}
311
312impl std::fmt::Display for LineColumn {
313    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
314        write!(f, "{}:{}", self.line, self.column)
315    }
316}
317
318/// Convert a byte offset to line and column numbers in the given text.
319///
320/// Line and column numbers are 1-indexed. Column numbers count Unicode scalar values,
321/// not bytes or grapheme clusters.
322///
323/// # Arguments
324///
325/// * `text` - The full source text
326/// * `byte_offset` - Byte offset into the text
327///
328/// # Returns
329///
330/// `LineColumn` with 1-indexed line and column numbers, or line 1, column 1 if offset is out of bounds.
331///
332/// # Examples
333///
334/// ```
335/// use yaml_edit::byte_offset_to_line_column;
336///
337/// let text = "line 1\nline 2\nline 3";
338/// let pos = byte_offset_to_line_column(text, 7); // Start of "line 2"
339/// assert_eq!(pos.line, 2);
340/// assert_eq!(pos.column, 1);
341/// ```
342pub fn byte_offset_to_line_column(text: &str, byte_offset: usize) -> LineColumn {
343    let mut line = 1;
344    let mut column = 1;
345
346    for (i, ch) in text.char_indices() {
347        if i >= byte_offset {
348            break;
349        }
350
351        if ch == '\n' {
352            line += 1;
353            column = 1;
354        } else {
355            column += 1;
356        }
357    }
358
359    LineColumn { line, column }
360}
361
362impl From<rowan::TextRange> for TextPosition {
363    fn from(range: rowan::TextRange) -> Self {
364        Self {
365            start: u32::from(range.start()),
366            end: u32::from(range.end()),
367        }
368    }
369}
370
371impl From<TextPosition> for rowan::TextRange {
372    fn from(pos: TextPosition) -> Self {
373        rowan::TextRange::new(pos.start.into(), pos.end.into())
374    }
375}
376
377/// The kind of parse error, enabling structured matching without string parsing.
378#[derive(Debug, Clone, PartialEq, Eq, Hash)]
379pub enum ParseErrorKind {
380    /// An unclosed flow sequence (missing `]`)
381    UnclosedFlowSequence,
382    /// An unclosed flow mapping (missing `}`)
383    UnclosedFlowMapping,
384    /// An unterminated quoted string (missing closing quote)
385    UnterminatedString,
386    /// Any other parse error
387    Other,
388}
389
390/// A positioned parse error containing location information.
391#[derive(Debug, Clone, PartialEq, Eq, Hash)]
392pub struct PositionedParseError {
393    /// The error message
394    pub message: String,
395    /// The text range where the error occurred
396    pub range: TextPosition,
397    /// Optional error code for categorization
398    pub code: Option<String>,
399    /// Structured error kind
400    pub kind: ParseErrorKind,
401}
402
403impl PositionedParseError {
404    /// Get the line and column where this error starts (if source text is available).
405    ///
406    /// # Arguments
407    ///
408    /// * `source_text` - The original YAML source text
409    ///
410    /// # Returns
411    ///
412    /// `LineColumn` with 1-indexed line and column numbers.
413    ///
414    /// # Examples
415    ///
416    /// ```
417    /// use yaml_edit::{YamlFile, Parse};
418    /// use std::str::FromStr;
419    ///
420    /// let text = "invalid:\n  - [unclosed";
421    /// let parse = Parse::parse_yaml(text);
422    ///
423    /// if let Some(err) = parse.positioned_errors().first() {
424    ///     let pos = err.start_position(text);
425    ///     assert_eq!(pos.line, 2);
426    /// }
427    /// ```
428    pub fn start_position(&self, source_text: &str) -> LineColumn {
429        byte_offset_to_line_column(source_text, self.range.start as usize)
430    }
431
432    /// Get the line and column where this error ends (if source text is available).
433    ///
434    /// # Arguments
435    ///
436    /// * `source_text` - The original YAML source text
437    ///
438    /// # Returns
439    ///
440    /// `LineColumn` with 1-indexed line and column numbers.
441    pub fn end_position(&self, source_text: &str) -> LineColumn {
442        byte_offset_to_line_column(source_text, self.range.end as usize)
443    }
444}
445
446impl std::fmt::Display for PositionedParseError {
447    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
448        write!(f, "{}", self.message)
449    }
450}
451
452impl std::error::Error for PositionedParseError {}
453
454/// The indentation to use when writing a YAML file.
455#[derive(Debug, Clone, Copy, PartialEq, Eq)]
456pub enum Indentation {
457    /// Use the same indentation as the original line for the value.
458    FieldNameLength,
459
460    /// The number of spaces to use for indentation.
461    Spaces(u32),
462}
463
464impl Default for Indentation {
465    fn default() -> Self {
466        Indentation::Spaces(2)
467    }
468}
469
470#[cfg(test)]
471mod tests {
472    use super::*;
473    use std::str::FromStr;
474
475    #[test]
476    fn test_byte_offset_to_line_column_basic() {
477        let text = "line 1\nline 2\nline 3";
478
479        let pos = byte_offset_to_line_column(text, 0);
480        assert_eq!(pos.line, 1);
481        assert_eq!(pos.column, 1);
482
483        let pos = byte_offset_to_line_column(text, 7);
484        assert_eq!(pos.line, 2);
485        assert_eq!(pos.column, 1);
486
487        let pos = byte_offset_to_line_column(text, 10);
488        assert_eq!(pos.line, 2);
489        assert_eq!(pos.column, 4);
490
491        let pos = byte_offset_to_line_column(text, 14);
492        assert_eq!(pos.line, 3);
493        assert_eq!(pos.column, 1);
494    }
495
496    #[test]
497    fn test_byte_offset_to_line_column_unicode() {
498        let text = "hello\n世界\nworld";
499
500        let pos = byte_offset_to_line_column(text, 0);
501        assert_eq!(pos.line, 1);
502        assert_eq!(pos.column, 1);
503
504        let pos = byte_offset_to_line_column(text, 6);
505        assert_eq!(pos.line, 2);
506        assert_eq!(pos.column, 1);
507
508        // After first Chinese character "世" (3 bytes)
509        let pos = byte_offset_to_line_column(text, 9);
510        assert_eq!(pos.line, 2);
511        assert_eq!(pos.column, 2);
512    }
513
514    #[test]
515    fn test_line_column_display() {
516        let pos = LineColumn::new(42, 17);
517        assert_eq!(format!("{}", pos), "42:17");
518
519        let pos2 = LineColumn::new(1, 1);
520        assert_eq!(format!("{}", pos2), "1:1");
521    }
522
523    #[test]
524    fn test_document_position() {
525        let text = "name: Alice\nage: 30";
526        let doc = Document::from_str(text).unwrap();
527
528        let start = doc.start_position(text);
529        assert_eq!(start.line, 1);
530        assert_eq!(start.column, 1);
531
532        let range = doc.byte_range();
533        assert_eq!(range.start, 0);
534        assert!(range.end > 0);
535    }
536
537    #[test]
538    fn test_mapping_position() {
539        let text = "server:\n  host: localhost\n  port: 8080";
540        let doc = Document::from_str(text).unwrap();
541        let mapping = doc.as_mapping().unwrap();
542
543        let start = mapping.start_position(text);
544        assert_eq!(start.line, 1);
545        assert_eq!(start.column, 1);
546
547        let server_mapping = mapping.get_mapping("server").unwrap();
548        let server_start = server_mapping.start_position(text);
549        assert_eq!(server_start.line, 2);
550    }
551
552    #[test]
553    fn test_scalar_position_via_nodes() {
554        let text = "name: Alice\nage: 30";
555        let doc = Document::from_str(text).unwrap();
556        let mapping = doc.as_mapping().unwrap();
557
558        let entries: Vec<_> = mapping.entries().collect();
559        assert!(entries.len() >= 2);
560
561        let first_entry = &entries[0];
562        let key_node = first_entry.key_node().unwrap();
563        assert_eq!(key_node.to_string().trim(), "name");
564
565        let value_node = first_entry.value_node().unwrap();
566        assert_eq!(value_node.to_string().trim(), "Alice");
567    }
568
569    #[test]
570    fn test_sequence_position() {
571        let text = "items:\n  - apple\n  - banana";
572        let doc = Document::from_str(text).unwrap();
573        let mapping = doc.as_mapping().unwrap();
574
575        let items_node = mapping.get("items").unwrap();
576        assert!(items_node.as_sequence().is_some());
577    }
578
579    #[test]
580    fn test_positioned_parse_error() {
581        let text = "invalid:\n  - [unclosed";
582        let parse = Parse::parse_yaml(text);
583
584        let errors = parse.positioned_errors();
585        if errors.is_empty() {
586            return;
587        }
588
589        let err = &errors[0];
590        let start = err.start_position(text);
591        assert_eq!(start.line, 2);
592    }
593
594    #[test]
595    fn test_multiline_document_byte_offsets() {
596        let text = "# Comment\nname: Alice\n\nage: 30";
597        let doc = Document::from_str(text).unwrap();
598
599        let range = doc.byte_range();
600        assert_eq!(range.start, 10);
601        assert_eq!(range.end, 30);
602
603        let start = doc.start_position(text);
604        assert_eq!(start.line, 2);
605        assert_eq!(start.column, 1);
606    }
607
608    #[test]
609    fn test_nested_mapping_byte_ranges() {
610        let text = "server:\n  database:\n    host: localhost";
611        let doc = Document::from_str(text).unwrap();
612        let mapping = doc.as_mapping().unwrap();
613
614        let server_mapping = mapping.get_mapping("server").unwrap();
615        let server_range = server_mapping.byte_range();
616
617        assert!(server_range.end > server_range.start);
618
619        let server_pos = server_mapping.start_position(text);
620        assert!(server_pos.line > 0);
621    }
622
623    #[test]
624    fn test_empty_lines_positions() {
625        let text = "a: 1\n\n\nb: 2";
626
627        let pos1 = byte_offset_to_line_column(text, 0);
628        assert_eq!(pos1.line, 1);
629
630        let pos2 = byte_offset_to_line_column(text, 7);
631        assert_eq!(pos2.line, 4);
632    }
633
634    #[test]
635    fn test_document_end_position() {
636        let text = "key: value";
637        let doc = Document::from_str(text).unwrap();
638
639        let start = doc.start_position(text);
640        let end = doc.end_position(text);
641
642        assert_eq!(start.line, 1);
643        assert!(end.column >= start.column);
644    }
645
646    #[test]
647    fn test_mapping_end_position() {
648        let text = "a: 1\nb: 2";
649        let doc = Document::from_str(text).unwrap();
650        let mapping = doc.as_mapping().unwrap();
651
652        let start = mapping.start_position(text);
653        let end = mapping.end_position(text);
654
655        assert_eq!(start.line, 1);
656        assert_eq!(end.line, 2);
657    }
658}