rust_yaml/
tag.rs

1//! YAML tag resolution and handling system
2//!
3//! This module implements the full YAML 1.2 tag resolution mechanism,
4//! including support for custom tag handlers and schema validation.
5
6use crate::{Error, Result, Value};
7use std::collections::HashMap;
8use std::fmt;
9
10/// Tag handle types as defined in YAML 1.2 spec
11#[derive(Debug, Clone, PartialEq, Eq, Hash)]
12pub enum TagHandle {
13    /// Primary handle (!)
14    Primary,
15    /// Secondary handle (!!)
16    Secondary,
17    /// Named handle (e.g., !e!)
18    Named(String),
19    /// Verbatim tag (e.g., !<tag:example.com,2024:type>)
20    Verbatim,
21}
22
23impl fmt::Display for TagHandle {
24    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
25        match self {
26            Self::Primary => write!(f, "!"),
27            Self::Secondary => write!(f, "!!"),
28            Self::Named(name) => write!(f, "!{}!", name),
29            Self::Verbatim => write!(f, "!<>"),
30        }
31    }
32}
33
34/// A resolved YAML tag
35#[derive(Debug, Clone, PartialEq, Eq, Hash)]
36pub struct Tag {
37    /// The fully resolved tag URI
38    pub uri: String,
39    /// The original tag representation (for round-trip)
40    pub original: String,
41    /// Tag kind for quick identification
42    pub kind: TagKind,
43}
44
45/// Tag kinds for quick type identification
46#[derive(Debug, Clone, PartialEq, Eq, Hash)]
47#[allow(missing_docs)]
48pub enum TagKind {
49    /// Core YAML types
50    Null,
51    Bool,
52    Int,
53    Float,
54    Str,
55    /// Collection types
56    Seq,
57    Map,
58    /// Extended types
59    Binary,
60    Timestamp,
61    Set,
62    Omap,
63    Pairs,
64    /// Custom application type
65    Custom(String),
66}
67
68/// Tag resolution context
69pub struct TagResolver {
70    /// Tag directives (handle -> prefix)
71    directives: HashMap<String, String>,
72    /// Custom tag handlers
73    handlers: HashMap<String, Box<dyn TagHandler>>,
74    /// Default schema
75    schema: Schema,
76}
77
78impl fmt::Debug for TagResolver {
79    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
80        f.debug_struct("TagResolver")
81            .field("directives", &self.directives)
82            .field("handlers_count", &self.handlers.len())
83            .field("schema", &self.schema)
84            .finish()
85    }
86}
87
88impl TagResolver {
89    /// Create a new tag resolver with default schema
90    pub fn new() -> Self {
91        Self::with_schema(Schema::Core)
92    }
93
94    /// Create a new tag resolver with specific schema
95    pub fn with_schema(schema: Schema) -> Self {
96        let mut resolver = Self {
97            directives: HashMap::new(),
98            handlers: HashMap::new(),
99            schema,
100        };
101
102        // Initialize default tag directives
103        resolver.directives.insert("!".to_string(), "!".to_string());
104        resolver
105            .directives
106            .insert("!!".to_string(), "tag:yaml.org,2002:".to_string());
107
108        resolver
109    }
110
111    /// Add a tag directive
112    pub fn add_directive(&mut self, handle: String, prefix: String) {
113        self.directives.insert(handle, prefix);
114    }
115
116    /// Clear all tag directives
117    pub fn clear_directives(&mut self) {
118        self.directives.clear();
119        // Re-add defaults
120        self.directives.insert("!".to_string(), "!".to_string());
121        self.directives
122            .insert("!!".to_string(), "tag:yaml.org,2002:".to_string());
123    }
124
125    /// Register a custom tag handler
126    pub fn register_handler(&mut self, tag_uri: String, handler: Box<dyn TagHandler>) {
127        self.handlers.insert(tag_uri, handler);
128    }
129
130    /// Resolve a tag string to a full Tag
131    pub fn resolve(&self, tag_str: &str) -> Result<Tag> {
132        let (uri, original) = if tag_str.starts_with("tag:") {
133            // Already a full URI
134            (tag_str.to_string(), tag_str.to_string())
135        } else if tag_str.starts_with("!<") && tag_str.ends_with('>') {
136            // Verbatim tag
137            let uri = tag_str[2..tag_str.len() - 1].to_string();
138            (uri, tag_str.to_string())
139        } else if tag_str.starts_with("!!") {
140            // Secondary handle
141            let suffix = &tag_str[2..];
142            let prefix = self
143                .directives
144                .get("!!")
145                .cloned()
146                .unwrap_or_else(|| "tag:yaml.org,2002:".to_string());
147            (format!("{}{}", prefix, suffix), tag_str.to_string())
148        } else if tag_str.starts_with('!') {
149            // Check for named handle
150            if let Some(end) = tag_str[1..].find('!') {
151                let handle_name = &tag_str[1..end + 1];
152                let handle = format!("!{}!", handle_name);
153                let suffix = &tag_str[end + 2..];
154
155                if let Some(prefix) = self.directives.get(&handle) {
156                    (format!("{}{}", prefix, suffix), tag_str.to_string())
157                } else {
158                    // Unknown named handle, treat as primary
159                    let prefix = self
160                        .directives
161                        .get("!")
162                        .cloned()
163                        .unwrap_or_else(|| "!".to_string());
164                    (format!("{}{}", prefix, &tag_str[1..]), tag_str.to_string())
165                }
166            } else {
167                // Primary handle
168                let suffix = &tag_str[1..];
169                let prefix = self
170                    .directives
171                    .get("!")
172                    .cloned()
173                    .unwrap_or_else(|| "!".to_string());
174                (format!("{}{}", prefix, suffix), tag_str.to_string())
175            }
176        } else {
177            // No tag prefix, use implicit tagging based on schema
178            (
179                self.schema.default_tag_for(tag_str),
180                format!("!{}", tag_str),
181            )
182        };
183
184        let kind = Self::identify_tag_kind(&uri);
185
186        Ok(Tag {
187            uri,
188            original,
189            kind,
190        })
191    }
192
193    /// Identify the kind of tag from its URI
194    fn identify_tag_kind(uri: &str) -> TagKind {
195        match uri {
196            "tag:yaml.org,2002:null" => TagKind::Null,
197            "tag:yaml.org,2002:bool" => TagKind::Bool,
198            "tag:yaml.org,2002:int" => TagKind::Int,
199            "tag:yaml.org,2002:float" => TagKind::Float,
200            "tag:yaml.org,2002:str" => TagKind::Str,
201            "tag:yaml.org,2002:seq" => TagKind::Seq,
202            "tag:yaml.org,2002:map" => TagKind::Map,
203            "tag:yaml.org,2002:binary" => TagKind::Binary,
204            "tag:yaml.org,2002:timestamp" => TagKind::Timestamp,
205            "tag:yaml.org,2002:set" => TagKind::Set,
206            "tag:yaml.org,2002:omap" => TagKind::Omap,
207            "tag:yaml.org,2002:pairs" => TagKind::Pairs,
208            _ => TagKind::Custom(uri.to_string()),
209        }
210    }
211
212    /// Apply a tag to a value
213    pub fn apply_tag(&self, tag: &Tag, value: &str) -> Result<Value> {
214        // Check for custom handler first
215        if let Some(handler) = self.handlers.get(&tag.uri) {
216            return handler.construct(value);
217        }
218
219        // Use built-in tag handling
220        match &tag.kind {
221            TagKind::Null => Ok(Value::Null),
222            TagKind::Bool => self.construct_bool(value),
223            TagKind::Int => self.construct_int(value),
224            TagKind::Float => self.construct_float(value),
225            TagKind::Str => Ok(Value::String(value.to_string())),
226            TagKind::Binary => self.construct_binary(value),
227            TagKind::Timestamp => self.construct_timestamp(value),
228            _ => Ok(Value::String(value.to_string())), // Default to string
229        }
230    }
231
232    /// Construct a boolean from a tagged value
233    fn construct_bool(&self, value: &str) -> Result<Value> {
234        match value.to_lowercase().as_str() {
235            "true" | "yes" | "on" => Ok(Value::Bool(true)),
236            "false" | "no" | "off" => Ok(Value::Bool(false)),
237            _ => Err(Error::Type {
238                expected: "boolean".to_string(),
239                found: format!("'{}'", value),
240                position: crate::Position::start(),
241                context: None,
242            }),
243        }
244    }
245
246    /// Construct an integer from a tagged value
247    fn construct_int(&self, value: &str) -> Result<Value> {
248        // Handle different integer formats
249        let parsed = if value.starts_with("0x") || value.starts_with("0X") {
250            // Hexadecimal
251            i64::from_str_radix(&value[2..], 16)
252        } else if value.starts_with("0o") || value.starts_with("0O") {
253            // Octal
254            i64::from_str_radix(&value[2..], 8)
255        } else if value.starts_with("0b") || value.starts_with("0B") {
256            // Binary
257            i64::from_str_radix(&value[2..], 2)
258        } else {
259            // Decimal (with underscore support)
260            value.replace('_', "").parse::<i64>()
261        };
262
263        parsed.map(Value::Int).map_err(|_| Error::Type {
264            expected: "integer".to_string(),
265            found: format!("'{}'", value),
266            position: crate::Position::start(),
267            context: None,
268        })
269    }
270
271    /// Construct a float from a tagged value
272    fn construct_float(&self, value: &str) -> Result<Value> {
273        match value.to_lowercase().as_str() {
274            ".inf" | "+.inf" => Ok(Value::Float(f64::INFINITY)),
275            "-.inf" => Ok(Value::Float(f64::NEG_INFINITY)),
276            ".nan" => Ok(Value::Float(f64::NAN)),
277            _ => value
278                .replace('_', "")
279                .parse::<f64>()
280                .map(Value::Float)
281                .map_err(|_| Error::Type {
282                    expected: "float".to_string(),
283                    found: format!("'{}'", value),
284                    position: crate::Position::start(),
285                    context: None,
286                }),
287        }
288    }
289
290    /// Construct binary data from a tagged value (base64)
291    fn construct_binary(&self, value: &str) -> Result<Value> {
292        use base64::{engine::general_purpose::STANDARD, Engine as _};
293
294        // Remove whitespace from base64 string
295        let clean = value
296            .chars()
297            .filter(|c| !c.is_whitespace())
298            .collect::<String>();
299
300        match STANDARD.decode(&clean) {
301            Ok(bytes) => {
302                // Try to convert to UTF-8 string, otherwise store as binary marker
303                match String::from_utf8(bytes) {
304                    Ok(s) => Ok(Value::String(s)),
305                    Err(_) => Ok(Value::String(format!(
306                        "[binary data: {} bytes]",
307                        clean.len() / 4 * 3
308                    ))),
309                }
310            }
311            Err(_) => Err(Error::Type {
312                expected: "base64-encoded binary".to_string(),
313                found: format!("invalid base64: '{}'", value),
314                position: crate::Position::start(),
315                context: None,
316            }),
317        }
318    }
319
320    /// Construct a timestamp from a tagged value
321    fn construct_timestamp(&self, value: &str) -> Result<Value> {
322        // For now, just store as tagged string
323        // A full implementation would parse ISO 8601 timestamps
324        Ok(Value::String(format!("timestamp:{}", value)))
325    }
326}
327
328impl Default for TagResolver {
329    fn default() -> Self {
330        Self::new()
331    }
332}
333
334/// YAML schemas define tag resolution rules
335#[derive(Debug, Clone, Copy, PartialEq, Eq)]
336pub enum Schema {
337    /// Core schema (YAML 1.2)
338    Core,
339    /// JSON schema (subset of YAML)
340    Json,
341    /// Failsafe schema (minimal)
342    Failsafe,
343}
344
345impl Schema {
346    /// Get the default tag for untagged values based on schema
347    pub fn default_tag_for(&self, _value: &str) -> String {
348        match self {
349            Self::Core => "tag:yaml.org,2002:str".to_string(),
350            Self::Json => "tag:yaml.org,2002:str".to_string(),
351            Self::Failsafe => "tag:yaml.org,2002:str".to_string(),
352        }
353    }
354
355    /// Check if implicit typing is allowed
356    pub fn allows_implicit_typing(&self) -> bool {
357        match self {
358            Self::Core => true,
359            Self::Json => true,
360            Self::Failsafe => false,
361        }
362    }
363}
364
365/// Trait for custom tag handlers
366pub trait TagHandler: Send + Sync {
367    /// Construct a value from the tagged string
368    fn construct(&self, value: &str) -> Result<Value>;
369
370    /// Represent a value as a string for this tag
371    fn represent(&self, value: &Value) -> Result<String>;
372}
373
374/// Example custom tag handler for a Point type
375pub struct PointTagHandler;
376
377impl TagHandler for PointTagHandler {
378    fn construct(&self, value: &str) -> Result<Value> {
379        // Parse "x,y" format
380        let parts: Vec<&str> = value.split(',').collect();
381        if parts.len() != 2 {
382            return Err(Error::Type {
383                expected: "point (x,y)".to_string(),
384                found: value.to_string(),
385                position: crate::Position::start(),
386                context: None,
387            });
388        }
389
390        let x = parts[0].trim().parse::<f64>().map_err(|_| Error::Type {
391            expected: "number".to_string(),
392            found: parts[0].to_string(),
393            position: crate::Position::start(),
394            context: None,
395        })?;
396
397        let y = parts[1].trim().parse::<f64>().map_err(|_| Error::Type {
398            expected: "number".to_string(),
399            found: parts[1].to_string(),
400            position: crate::Position::start(),
401            context: None,
402        })?;
403
404        // Store as a sequence for now
405        Ok(Value::Sequence(vec![Value::Float(x), Value::Float(y)]))
406    }
407
408    fn represent(&self, value: &Value) -> Result<String> {
409        if let Value::Sequence(seq) = value {
410            if seq.len() == 2 {
411                if let (Some(Value::Float(x)), Some(Value::Float(y))) = (seq.get(0), seq.get(1)) {
412                    return Ok(format!("{},{}", x, y));
413                }
414            }
415        }
416        Err(Error::Type {
417            expected: "point sequence".to_string(),
418            found: format!("{:?}", value),
419            position: crate::Position::start(),
420            context: None,
421        })
422    }
423}
424
425#[cfg(test)]
426mod tests {
427    use super::*;
428
429    #[test]
430    fn test_tag_resolution() {
431        let mut resolver = TagResolver::new();
432
433        // Test standard tags
434        let tag = resolver.resolve("!!str").unwrap();
435        assert_eq!(tag.uri, "tag:yaml.org,2002:str");
436        assert_eq!(tag.kind, TagKind::Str);
437
438        let tag = resolver.resolve("!!int").unwrap();
439        assert_eq!(tag.uri, "tag:yaml.org,2002:int");
440        assert_eq!(tag.kind, TagKind::Int);
441
442        // Test primary handle
443        resolver.add_directive("!".to_string(), "tag:example.com,2024:".to_string());
444        let tag = resolver.resolve("!custom").unwrap();
445        assert_eq!(tag.uri, "tag:example.com,2024:custom");
446
447        // Test named handle
448        resolver.add_directive("!e!".to_string(), "tag:example.com,2024:".to_string());
449        let tag = resolver.resolve("!e!widget").unwrap();
450        assert_eq!(tag.uri, "tag:example.com,2024:widget");
451
452        // Test verbatim tag
453        let tag = resolver.resolve("!<tag:explicit.com,2024:type>").unwrap();
454        assert_eq!(tag.uri, "tag:explicit.com,2024:type");
455    }
456
457    #[test]
458    fn test_tag_construction() {
459        let resolver = TagResolver::new();
460
461        // Test boolean construction
462        let tag = Tag {
463            uri: "tag:yaml.org,2002:bool".to_string(),
464            original: "!!bool".to_string(),
465            kind: TagKind::Bool,
466        };
467
468        assert_eq!(resolver.apply_tag(&tag, "true").unwrap(), Value::Bool(true));
469        assert_eq!(
470            resolver.apply_tag(&tag, "false").unwrap(),
471            Value::Bool(false)
472        );
473        assert_eq!(resolver.apply_tag(&tag, "yes").unwrap(), Value::Bool(true));
474        assert_eq!(resolver.apply_tag(&tag, "no").unwrap(), Value::Bool(false));
475
476        // Test integer construction
477        let tag = Tag {
478            uri: "tag:yaml.org,2002:int".to_string(),
479            original: "!!int".to_string(),
480            kind: TagKind::Int,
481        };
482
483        assert_eq!(resolver.apply_tag(&tag, "42").unwrap(), Value::Int(42));
484        assert_eq!(resolver.apply_tag(&tag, "0x2A").unwrap(), Value::Int(42));
485        assert_eq!(resolver.apply_tag(&tag, "0o52").unwrap(), Value::Int(42));
486        assert_eq!(
487            resolver.apply_tag(&tag, "0b101010").unwrap(),
488            Value::Int(42)
489        );
490        assert_eq!(resolver.apply_tag(&tag, "1_234").unwrap(), Value::Int(1234));
491
492        // Test float construction
493        let tag = Tag {
494            uri: "tag:yaml.org,2002:float".to_string(),
495            original: "!!float".to_string(),
496            kind: TagKind::Float,
497        };
498
499        assert_eq!(
500            resolver.apply_tag(&tag, "3.14").unwrap(),
501            Value::Float(3.14)
502        );
503        assert_eq!(
504            resolver.apply_tag(&tag, ".inf").unwrap(),
505            Value::Float(f64::INFINITY)
506        );
507        assert_eq!(
508            resolver.apply_tag(&tag, "-.inf").unwrap(),
509            Value::Float(f64::NEG_INFINITY)
510        );
511        assert!(matches!(resolver.apply_tag(&tag, ".nan").unwrap(), Value::Float(f) if f.is_nan()));
512    }
513
514    #[test]
515    fn test_custom_tag_handler() {
516        let mut resolver = TagResolver::new();
517
518        // Register custom point handler
519        resolver.register_handler(
520            "tag:example.com,2024:point".to_string(),
521            Box::new(PointTagHandler),
522        );
523
524        // Resolve and apply custom tag
525        resolver.add_directive("!".to_string(), "tag:example.com,2024:".to_string());
526        let tag = resolver.resolve("!point").unwrap();
527
528        let value = resolver.apply_tag(&tag, "3.5, 7.2").unwrap();
529        assert_eq!(
530            value,
531            Value::Sequence(vec![Value::Float(3.5), Value::Float(7.2)])
532        );
533    }
534}