Skip to main content

oxirs_core/model/
literal.rs

1//! RDF Literal implementation
2//!
3//! This implementation is extracted and adapted from Oxigraph's oxrdf literal handling
4//! to provide zero-dependency RDF literal support with full XSD datatype validation.
5
6use crate::model::{NamedNode, NamedNodeRef, ObjectTerm, RdfTerm};
7use crate::vocab::{rdf, xsd};
8use crate::OxirsError;
9use lazy_static::lazy_static;
10use oxilangtag::LanguageTag as OxiLanguageTag;
11use oxsdatatypes::{Boolean, Date, DateTime, Decimal, Double, Float, Integer, Time};
12use regex::Regex;
13use std::borrow::Cow;
14use std::fmt::{self, Write};
15use std::hash::Hash;
16use std::str::FromStr;
17
18/// Language tag validation error type
19#[derive(Debug, Clone, PartialEq, Eq)]
20pub struct LanguageTagParseError {
21    message: String,
22}
23
24impl fmt::Display for LanguageTagParseError {
25    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
26        write!(f, "Language tag parse error: {}", self.message)
27    }
28}
29
30impl std::error::Error for LanguageTagParseError {}
31
32impl From<LanguageTagParseError> for OxirsError {
33    fn from(err: LanguageTagParseError) -> Self {
34        OxirsError::Parse(err.message)
35    }
36}
37
38/// A language tag following BCP 47 specification
39#[derive(Debug, Clone, PartialEq, Eq, Hash)]
40pub struct LanguageTag {
41    tag: String,
42}
43
44impl LanguageTag {
45    /// Parses a language tag from a string
46    pub fn parse(tag: impl Into<String>) -> Result<Self, LanguageTagParseError> {
47        let tag = tag.into();
48        validate_language_tag(&tag)?;
49        Ok(LanguageTag { tag })
50    }
51
52    /// Returns the language tag as a string slice
53    pub fn as_str(&self) -> &str {
54        &self.tag
55    }
56
57    /// Consumes the language tag and returns the inner string
58    pub fn into_inner(self) -> String {
59        self.tag
60    }
61}
62
63impl fmt::Display for LanguageTag {
64    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
65        f.write_str(&self.tag)
66    }
67}
68
69lazy_static! {
70    /// BCP 47 language tag validation regex
71    /// Based on RFC 5646 - Tags for Identifying Languages
72    static ref LANGUAGE_TAG_REGEX: Regex = Regex::new(
73        r"^([a-zA-Z]{2,3}(-[a-zA-Z]{3}){0,3}(-[a-zA-Z]{4})?(-[a-zA-Z]{2}|\d{3})?(-[0-9a-zA-Z]{5,8}|-\d[0-9a-zA-Z]{3})*(-[0-9a-wyzA-WYZ](-[0-9a-zA-Z]{2,8})+)*(-x(-[0-9a-zA-Z]{1,8})+)?|x(-[0-9a-zA-Z]{1,8})+|[a-zA-Z]{4}|[a-zA-Z]{5,8})$"
74    ).expect("Language tag regex compilation failed");
75
76    /// Simple language subtag validation (2-3 letter language codes)
77    static ref SIMPLE_LANGUAGE_REGEX: Regex = Regex::new(
78        r"^[a-zA-Z]{2,3}$"
79    ).expect("Simple language regex compilation failed");
80
81    /// XSD numeric type validation regexes
82    static ref INTEGER_REGEX: Regex = Regex::new(
83        r"^[+-]?\d+$"
84    ).expect("Integer regex compilation failed");
85
86    static ref DECIMAL_REGEX: Regex = Regex::new(
87        r"^[+-]?(\d+(\.\d*)?|\.\d+)$"
88    ).expect("Decimal regex compilation failed");
89
90    static ref DOUBLE_REGEX: Regex = Regex::new(
91        r"^[+-]?(\d+(\.\d*)?|\.\d+)([eE][+-]?\d+)?$|^[+-]?INF$|^NaN$"
92    ).expect("Double regex compilation failed");
93
94    static ref BOOLEAN_REGEX: Regex = Regex::new(
95        r"^(true|false|1|0)$"
96    ).expect("Boolean regex compilation failed");
97
98    /// DateTime validation (simplified ISO 8601)
99    static ref DATETIME_REGEX: Regex = Regex::new(
100        r"^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(\.\d+)?(Z|[+-]\d{2}:\d{2})?$"
101    ).expect("DateTime regex compilation failed");
102
103    static ref DATE_REGEX: Regex = Regex::new(
104        r"^\d{4}-\d{2}-\d{2}(Z|[+-]\d{2}:\d{2})?$"
105    ).expect("Date regex compilation failed");
106
107    static ref TIME_REGEX: Regex = Regex::new(
108        r"^\d{2}:\d{2}:\d{2}(\.\d+)?(Z|[+-]\d{2}:\d{2})?$"
109    ).expect("Time regex compilation failed");
110}
111
112/// Validates a language tag according to BCP 47 (RFC 5646) using oxilangtag
113fn validate_language_tag(tag: &str) -> Result<(), LanguageTagParseError> {
114    OxiLanguageTag::parse(tag)
115        .map(|_| ())
116        .map_err(|e| LanguageTagParseError {
117            message: format!("Invalid language tag '{tag}': {e}"),
118        })
119}
120
121/// Validates a literal value against its XSD datatype
122pub fn validate_xsd_value(value: &str, datatype_iri: &str) -> Result<(), OxirsError> {
123    match datatype_iri {
124        // String types
125        "http://www.w3.org/2001/XMLSchema#string"
126        | "http://www.w3.org/2001/XMLSchema#normalizedString"
127        | "http://www.w3.org/2001/XMLSchema#token" => {
128            // All strings are valid for string types
129            Ok(())
130        }
131
132        // Boolean type - use oxsdatatypes Boolean parsing
133        "http://www.w3.org/2001/XMLSchema#boolean" => Boolean::from_str(value)
134            .map(|_| ())
135            .map_err(|e| OxirsError::Parse(format!("Invalid boolean value '{value}': {e}"))),
136
137        // Integer types - use oxsdatatypes Integer parsing with range validation
138        "http://www.w3.org/2001/XMLSchema#integer"
139        | "http://www.w3.org/2001/XMLSchema#long"
140        | "http://www.w3.org/2001/XMLSchema#int"
141        | "http://www.w3.org/2001/XMLSchema#short"
142        | "http://www.w3.org/2001/XMLSchema#byte"
143        | "http://www.w3.org/2001/XMLSchema#unsignedLong"
144        | "http://www.w3.org/2001/XMLSchema#unsignedInt"
145        | "http://www.w3.org/2001/XMLSchema#unsignedShort"
146        | "http://www.w3.org/2001/XMLSchema#unsignedByte"
147        | "http://www.w3.org/2001/XMLSchema#positiveInteger"
148        | "http://www.w3.org/2001/XMLSchema#nonNegativeInteger"
149        | "http://www.w3.org/2001/XMLSchema#negativeInteger"
150        | "http://www.w3.org/2001/XMLSchema#nonPositiveInteger" => Integer::from_str(value)
151            .map_err(|e| OxirsError::Parse(format!("Invalid integer value '{value}': {e}")))
152            .and_then(|integer| validate_integer_range_oxs(integer, datatype_iri)),
153
154        // Decimal type - use oxsdatatypes Decimal parsing
155        "http://www.w3.org/2001/XMLSchema#decimal" => Decimal::from_str(value)
156            .map(|_| ())
157            .map_err(|e| OxirsError::Parse(format!("Invalid decimal value '{value}': {e}"))),
158
159        // Floating point types - use oxsdatatypes Float/Double parsing
160        "http://www.w3.org/2001/XMLSchema#float" => Float::from_str(value)
161            .map(|_| ())
162            .map_err(|e| OxirsError::Parse(format!("Invalid float value '{value}': {e}"))),
163        "http://www.w3.org/2001/XMLSchema#double" => Double::from_str(value)
164            .map(|_| ())
165            .map_err(|e| OxirsError::Parse(format!("Invalid double value '{value}': {e}"))),
166
167        // Date/time types - use oxsdatatypes parsing
168        "http://www.w3.org/2001/XMLSchema#dateTime" => DateTime::from_str(value)
169            .map(|_| ())
170            .map_err(|e| OxirsError::Parse(format!("Invalid dateTime value '{value}': {e}"))),
171
172        "http://www.w3.org/2001/XMLSchema#date" => Date::from_str(value)
173            .map(|_| ())
174            .map_err(|e| OxirsError::Parse(format!("Invalid date value '{value}': {e}"))),
175
176        "http://www.w3.org/2001/XMLSchema#time" => Time::from_str(value)
177            .map(|_| ())
178            .map_err(|e| OxirsError::Parse(format!("Invalid time value '{value}': {e}"))),
179
180        // For unknown datatypes, don't validate
181        _ => Ok(()),
182    }
183}
184
185/// Validates integer values against their specific type ranges
186#[allow(dead_code)]
187fn validate_integer_range(value: &str, datatype_iri: &str) -> Result<(), OxirsError> {
188    let parsed_value: i64 = value
189        .parse()
190        .map_err(|_| OxirsError::Parse(format!("Cannot parse integer: '{value}'")))?;
191
192    match datatype_iri {
193        "http://www.w3.org/2001/XMLSchema#byte" => {
194            if !(-128..=127).contains(&parsed_value) {
195                return Err(OxirsError::Parse(format!(
196                    "Byte value out of range: {parsed_value}. Must be between -128 and 127"
197                )));
198            }
199        }
200        "http://www.w3.org/2001/XMLSchema#short" => {
201            if !(-32768..=32767).contains(&parsed_value) {
202                return Err(OxirsError::Parse(format!(
203                    "Short value out of range: {parsed_value}. Must be between -32768 and 32767"
204                )));
205            }
206        }
207        "http://www.w3.org/2001/XMLSchema#int" => {
208            if !(-2147483648..=2147483647).contains(&parsed_value) {
209                return Err(OxirsError::Parse(format!(
210                    "Int value out of range: {parsed_value}. Must be between -2147483648 and 2147483647"
211                )));
212            }
213        }
214        "http://www.w3.org/2001/XMLSchema#unsignedByte" => {
215            if !(0..=255).contains(&parsed_value) {
216                return Err(OxirsError::Parse(format!(
217                    "Unsigned byte value out of range: {parsed_value}. Must be between 0 and 255"
218                )));
219            }
220        }
221        "http://www.w3.org/2001/XMLSchema#unsignedShort" => {
222            if !(0..=65535).contains(&parsed_value) {
223                return Err(OxirsError::Parse(format!(
224                    "Unsigned short value out of range: {parsed_value}. Must be between 0 and 65535"
225                )));
226            }
227        }
228        "http://www.w3.org/2001/XMLSchema#unsignedInt" => {
229            if !(0..=4294967295).contains(&parsed_value) {
230                return Err(OxirsError::Parse(format!(
231                    "Unsigned int value out of range: {parsed_value}. Must be between 0 and 4294967295"
232                )));
233            }
234        }
235        "http://www.w3.org/2001/XMLSchema#positiveInteger" => {
236            if parsed_value <= 0 {
237                return Err(OxirsError::Parse(format!(
238                    "Positive integer must be greater than 0, got: {parsed_value}"
239                )));
240            }
241        }
242        "http://www.w3.org/2001/XMLSchema#nonNegativeInteger" => {
243            if parsed_value < 0 {
244                return Err(OxirsError::Parse(format!(
245                    "Non-negative integer must be >= 0, got: {parsed_value}"
246                )));
247            }
248        }
249        "http://www.w3.org/2001/XMLSchema#negativeInteger" => {
250            if parsed_value >= 0 {
251                return Err(OxirsError::Parse(format!(
252                    "Negative integer must be less than 0, got: {parsed_value}"
253                )));
254            }
255        }
256        "http://www.w3.org/2001/XMLSchema#nonPositiveInteger" => {
257            if parsed_value > 0 {
258                return Err(OxirsError::Parse(format!(
259                    "Non-positive integer must be <= 0, got: {parsed_value}"
260                )));
261            }
262        }
263        _ => {} // Other integer types don't have additional range restrictions in this simplified implementation
264    }
265
266    Ok(())
267}
268
269/// Validates integer values against their specific type ranges using oxsdatatypes Integer
270fn validate_integer_range_oxs(integer: Integer, datatype_iri: &str) -> Result<(), OxirsError> {
271    // Convert oxsdatatypes Integer to i64 for range checking
272    let parsed_value: i64 = integer.to_string().parse().map_err(|_| {
273        OxirsError::Parse("Cannot convert integer to i64 for range validation".to_string())
274    })?;
275
276    match datatype_iri {
277        "http://www.w3.org/2001/XMLSchema#byte" => {
278            if !(-128..=127).contains(&parsed_value) {
279                return Err(OxirsError::Parse(format!(
280                    "Byte value out of range: {parsed_value}. Must be between -128 and 127"
281                )));
282            }
283        }
284        "http://www.w3.org/2001/XMLSchema#short" => {
285            if !(-32768..=32767).contains(&parsed_value) {
286                return Err(OxirsError::Parse(format!(
287                    "Short value out of range: {parsed_value}. Must be between -32768 and 32767"
288                )));
289            }
290        }
291        "http://www.w3.org/2001/XMLSchema#int" => {
292            if !(-2147483648..=2147483647).contains(&parsed_value) {
293                return Err(OxirsError::Parse(format!(
294                    "Int value out of range: {parsed_value}. Must be between -2147483648 and 2147483647"
295                )));
296            }
297        }
298        "http://www.w3.org/2001/XMLSchema#unsignedByte" => {
299            if !(0..=255).contains(&parsed_value) {
300                return Err(OxirsError::Parse(format!(
301                    "Unsigned byte value out of range: {parsed_value}. Must be between 0 and 255"
302                )));
303            }
304        }
305        "http://www.w3.org/2001/XMLSchema#unsignedShort" => {
306            if !(0..=65535).contains(&parsed_value) {
307                return Err(OxirsError::Parse(format!(
308                    "Unsigned short value out of range: {parsed_value}. Must be between 0 and 65535"
309                )));
310            }
311        }
312        "http://www.w3.org/2001/XMLSchema#unsignedInt" => {
313            if !(0..=4294967295).contains(&parsed_value) {
314                return Err(OxirsError::Parse(format!(
315                    "Unsigned int value out of range: {parsed_value}. Must be between 0 and 4294967295"
316                )));
317            }
318        }
319        "http://www.w3.org/2001/XMLSchema#positiveInteger" => {
320            if parsed_value <= 0 {
321                return Err(OxirsError::Parse(format!(
322                    "Positive integer must be greater than 0, got: {parsed_value}"
323                )));
324            }
325        }
326        "http://www.w3.org/2001/XMLSchema#nonNegativeInteger" => {
327            if parsed_value < 0 {
328                return Err(OxirsError::Parse(format!(
329                    "Non-negative integer must be >= 0, got: {parsed_value}"
330                )));
331            }
332        }
333        "http://www.w3.org/2001/XMLSchema#negativeInteger" => {
334            if parsed_value >= 0 {
335                return Err(OxirsError::Parse(format!(
336                    "Negative integer must be less than 0, got: {parsed_value}"
337                )));
338            }
339        }
340        "http://www.w3.org/2001/XMLSchema#nonPositiveInteger" => {
341            if parsed_value > 0 {
342                return Err(OxirsError::Parse(format!(
343                    "Non-positive integer must be <= 0, got: {parsed_value}"
344                )));
345            }
346        }
347        _ => {} // Other integer types don't have additional range restrictions
348    }
349
350    Ok(())
351}
352
353/// An owned RDF [literal](https://www.w3.org/TR/rdf11-concepts/#dfn-literal).
354///
355/// The default string formatter is returning an N-Triples, Turtle, and SPARQL compatible representation:
356/// ```
357/// use oxirs_core::model::literal::Literal;
358/// use oxirs_core::vocab::xsd;
359///
360/// assert_eq!(
361///     "\"foo\\nbar\"",
362///     Literal::new_simple_literal("foo\nbar").to_string()
363/// );
364///
365/// assert_eq!(
366///     r#""1999-01-01"^^<http://www.w3.org/2001/XMLSchema#date>"#,
367///     Literal::new_typed_literal("1999-01-01", xsd::DATE.clone()).to_string()
368/// );
369///
370/// assert_eq!(
371///     r#""foo"@en"#,
372///     Literal::new_language_tagged_literal("foo", "en").unwrap().to_string()
373/// );
374/// ```
375#[derive(Eq, PartialEq, Debug, Clone, Hash, PartialOrd, Ord)]
376#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
377pub struct Literal(LiteralContent);
378
379#[derive(PartialEq, Eq, Debug, Clone, Hash, PartialOrd, Ord)]
380#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
381enum LiteralContent {
382    String(String),
383    LanguageTaggedString {
384        value: String,
385        language: String,
386    },
387    #[cfg(feature = "rdf-12")]
388    DirectionalLanguageTaggedString {
389        value: String,
390        language: String,
391        direction: BaseDirection,
392    },
393    TypedLiteral {
394        value: String,
395        datatype: NamedNode,
396    },
397}
398
399impl Literal {
400    /// Builds an RDF [simple literal](https://www.w3.org/TR/rdf11-concepts/#dfn-simple-literal).
401    #[inline]
402    pub fn new_simple_literal(value: impl Into<String>) -> Self {
403        Self(LiteralContent::String(value.into()))
404    }
405
406    /// Creates a new string literal without language or datatype (alias for compatibility)
407    #[inline]
408    pub fn new(value: impl Into<String>) -> Self {
409        Self::new_simple_literal(value)
410    }
411
412    /// Builds an RDF [literal](https://www.w3.org/TR/rdf11-concepts/#dfn-literal) with a [datatype](https://www.w3.org/TR/rdf11-concepts/#dfn-datatype-iri).
413    #[inline]
414    pub fn new_typed_literal(value: impl Into<String>, datatype: impl Into<NamedNode>) -> Self {
415        let value = value.into();
416        let datatype = datatype.into();
417        Self(if datatype == *xsd::STRING {
418            LiteralContent::String(value)
419        } else {
420            LiteralContent::TypedLiteral { value, datatype }
421        })
422    }
423
424    /// Creates a new literal with a datatype (alias for compatibility)
425    #[inline]
426    pub fn new_typed(value: impl Into<String>, datatype: NamedNode) -> Self {
427        Self::new_typed_literal(value, datatype)
428    }
429
430    /// Creates a new literal with a datatype and validates the value
431    pub fn new_typed_validated(
432        value: impl Into<String>,
433        datatype: NamedNode,
434    ) -> Result<Self, OxirsError> {
435        let value = value.into();
436        validate_xsd_value(&value, datatype.as_str())?;
437        Ok(Literal::new_typed_literal(value, datatype))
438    }
439
440    /// Builds an RDF [language-tagged string](https://www.w3.org/TR/rdf11-concepts/#dfn-language-tagged-string).
441    #[inline]
442    pub fn new_language_tagged_literal(
443        value: impl Into<String>,
444        language: impl Into<String>,
445    ) -> Result<Self, LanguageTagParseError> {
446        let language = language.into().to_ascii_lowercase();
447        // Normalize to lowercase per RDF 1.1 spec (language tags are case-insensitive,
448        // stored as lowercase for consistent comparison and lookup).
449        validate_language_tag(&language)?;
450        Ok(Self::new_language_tagged_literal_unchecked(value, language))
451    }
452
453    /// Builds an RDF [language-tagged string](https://www.w3.org/TR/rdf11-concepts/#dfn-language-tagged-string).
454    ///
455    /// It is the responsibility of the caller to check that `language`
456    /// is valid [BCP47](https://tools.ietf.org/html/bcp47) language tag,
457    /// and is lowercase.
458    ///
459    /// [`Literal::new_language_tagged_literal()`] is a safe version of this constructor and should be used for untrusted data.
460    #[inline]
461    pub fn new_language_tagged_literal_unchecked(
462        value: impl Into<String>,
463        language: impl Into<String>,
464    ) -> Self {
465        Self(LiteralContent::LanguageTaggedString {
466            value: value.into(),
467            language: language.into(),
468        })
469    }
470
471    /// Creates a new literal with a language tag (alias for compatibility)
472    pub fn new_lang(
473        value: impl Into<String>,
474        language: impl Into<String>,
475    ) -> Result<Self, OxirsError> {
476        let result = Self::new_language_tagged_literal(value, language)?;
477        Ok(result)
478    }
479
480    /// Builds an RDF [directional language-tagged string](https://www.w3.org/TR/rdf12-concepts/#dfn-dir-lang-string).
481    #[cfg(feature = "rdf-12")]
482    #[inline]
483    pub fn new_directional_language_tagged_literal(
484        value: impl Into<String>,
485        language: impl Into<String>,
486        direction: impl Into<BaseDirection>,
487    ) -> Result<Self, LanguageTagParseError> {
488        let mut language = language.into();
489        language.make_ascii_lowercase();
490        validate_language_tag(&language)?;
491        Ok(Self::new_directional_language_tagged_literal_unchecked(
492            value, language, direction,
493        ))
494    }
495
496    /// Builds an RDF [directional language-tagged string](https://www.w3.org/TR/rdf12-concepts/#dfn-dir-lang-string).
497    ///
498    /// It is the responsibility of the caller to check that `language`
499    /// is valid [BCP47](https://tools.ietf.org/html/bcp47) language tag,
500    /// and is lowercase.
501    ///
502    /// [`Literal::new_directional_language_tagged_literal()`] is a safe version of this constructor and should be used for untrusted data.
503    #[cfg(feature = "rdf-12")]
504    #[inline]
505    pub fn new_directional_language_tagged_literal_unchecked(
506        value: impl Into<String>,
507        language: impl Into<String>,
508        direction: impl Into<BaseDirection>,
509    ) -> Self {
510        Self(LiteralContent::DirectionalLanguageTaggedString {
511            value: value.into(),
512            language: language.into(),
513            direction: direction.into(),
514        })
515    }
516
517    /// The literal [lexical form](https://www.w3.org/TR/rdf11-concepts/#dfn-lexical-form).
518    #[inline]
519    pub fn value(&self) -> &str {
520        self.as_ref().value()
521    }
522
523    /// The literal [language tag](https://www.w3.org/TR/rdf11-concepts/#dfn-language-tag) if it is a [language-tagged string](https://www.w3.org/TR/rdf11-concepts/#dfn-language-tagged-string).
524    ///
525    /// Language tags are defined by the [BCP47](https://tools.ietf.org/html/bcp47).
526    /// They are normalized to lowercase by this implementation.
527    #[inline]
528    pub fn language(&self) -> Option<&str> {
529        self.as_ref().language()
530    }
531
532    /// The literal [base direction](https://www.w3.org/TR/rdf12-concepts/#dfn-base-direction) if it is a [directional language-tagged string](https://www.w3.org/TR/rdf12-concepts/#dfn-base-direction).
533    ///
534    /// The two possible base directions are left-to-right (`ltr`) and right-to-left (`rtl`).
535    #[cfg(feature = "rdf-12")]
536    #[inline]
537    pub fn direction(&self) -> Option<BaseDirection> {
538        self.as_ref().direction()
539    }
540
541    /// The literal [datatype](https://www.w3.org/TR/rdf11-concepts/#dfn-datatype-iri).
542    ///
543    /// The datatype of [language-tagged string](https://www.w3.org/TR/rdf11-concepts/#dfn-language-tagged-string) is always [rdf:langString](https://www.w3.org/TR/rdf11-concepts/#dfn-language-tagged-string).
544    /// The datatype of [simple literals](https://www.w3.org/TR/rdf11-concepts/#dfn-simple-literal) is [xsd:string](https://www.w3.org/TR/xmlschema11-2/#string).
545    #[inline]
546    pub fn datatype(&self) -> NamedNodeRef<'_> {
547        self.as_ref().datatype()
548    }
549
550    /// Checks if this literal could be seen as an RDF 1.0 [plain literal](https://www.w3.org/TR/2004/REC-rdf-concepts-20040210/#dfn-plain-literal).
551    ///
552    /// It returns true if the literal is a [language-tagged string](https://www.w3.org/TR/rdf11-concepts/#dfn-language-tagged-string)
553    /// or has the datatype [xsd:string](https://www.w3.org/TR/xmlschema11-2/#string).
554    #[inline]
555    #[deprecated(note = "Plain literal concept is removed in RDF 1.1", since = "0.3.0")]
556    pub fn is_plain(&self) -> bool {
557        #[allow(deprecated)]
558        self.as_ref().is_plain()
559    }
560
561    /// Returns true if this literal has a language tag
562    pub fn is_lang_string(&self) -> bool {
563        self.language().is_some()
564    }
565
566    /// Returns true if this literal has a datatype (excluding xsd:string which is implicit)
567    pub fn is_typed(&self) -> bool {
568        matches!(&self.0, LiteralContent::TypedLiteral { .. })
569    }
570
571    #[inline]
572    pub fn as_ref(&self) -> LiteralRef<'_> {
573        LiteralRef(match &self.0 {
574            LiteralContent::String(value) => LiteralRefContent::String(value),
575            LiteralContent::LanguageTaggedString { value, language } => {
576                LiteralRefContent::LanguageTaggedString { value, language }
577            }
578            #[cfg(feature = "rdf-12")]
579            LiteralContent::DirectionalLanguageTaggedString {
580                value,
581                language,
582                direction,
583            } => LiteralRefContent::DirectionalLanguageTaggedString {
584                value,
585                language,
586                direction: *direction,
587            },
588            LiteralContent::TypedLiteral { value, datatype } => LiteralRefContent::TypedLiteral {
589                value,
590                datatype: NamedNodeRef::new_unchecked(datatype.as_str()),
591            },
592        })
593    }
594
595    /// Extract components from this literal (value, datatype, language tag).
596    #[inline]
597    pub fn destruct(self) -> (String, Option<NamedNode>, Option<String>) {
598        match self.0 {
599            LiteralContent::String(s) => (s, None, None),
600            LiteralContent::LanguageTaggedString { value, language } => {
601                (value, None, Some(language))
602            }
603            #[cfg(feature = "rdf-12")]
604            LiteralContent::DirectionalLanguageTaggedString {
605                value,
606                language,
607                direction: _,
608            } => (value, None, Some(language)),
609            LiteralContent::TypedLiteral { value, datatype } => (value, Some(datatype), None),
610        }
611    }
612
613    /// Attempts to extract the value as a boolean
614    ///
615    /// Works for XSD boolean literals and other representations like "true"/"false"
616    pub fn as_bool(&self) -> Option<bool> {
617        match self.value().to_lowercase().as_str() {
618            "true" | "1" => Some(true),
619            "false" | "0" => Some(false),
620            _ => None,
621        }
622    }
623
624    /// Attempts to extract the value as an integer
625    ///
626    /// Works for XSD integer literals and other numeric representations
627    pub fn as_i64(&self) -> Option<i64> {
628        self.value().parse().ok()
629    }
630
631    /// Attempts to extract the value as a 32-bit integer
632    pub fn as_i32(&self) -> Option<i32> {
633        self.value().parse().ok()
634    }
635
636    /// Attempts to extract the value as a floating point number
637    ///
638    /// Works for XSD decimal, double, float literals
639    pub fn as_f64(&self) -> Option<f64> {
640        self.value().parse().ok()
641    }
642
643    /// Attempts to extract the value as a 32-bit floating point number
644    pub fn as_f32(&self) -> Option<f32> {
645        self.value().parse().ok()
646    }
647
648    /// Returns true if this literal represents a numeric value
649    pub fn is_numeric(&self) -> bool {
650        match &self.0 {
651            LiteralContent::TypedLiteral { datatype, .. } => {
652                let dt_iri = datatype.as_str();
653                matches!(
654                    dt_iri,
655                    "http://www.w3.org/2001/XMLSchema#integer"
656                        | "http://www.w3.org/2001/XMLSchema#decimal"
657                        | "http://www.w3.org/2001/XMLSchema#double"
658                        | "http://www.w3.org/2001/XMLSchema#float"
659                        | "http://www.w3.org/2001/XMLSchema#long"
660                        | "http://www.w3.org/2001/XMLSchema#int"
661                        | "http://www.w3.org/2001/XMLSchema#short"
662                        | "http://www.w3.org/2001/XMLSchema#byte"
663                        | "http://www.w3.org/2001/XMLSchema#unsignedLong"
664                        | "http://www.w3.org/2001/XMLSchema#unsignedInt"
665                        | "http://www.w3.org/2001/XMLSchema#unsignedShort"
666                        | "http://www.w3.org/2001/XMLSchema#unsignedByte"
667                        | "http://www.w3.org/2001/XMLSchema#positiveInteger"
668                        | "http://www.w3.org/2001/XMLSchema#nonNegativeInteger"
669                        | "http://www.w3.org/2001/XMLSchema#negativeInteger"
670                        | "http://www.w3.org/2001/XMLSchema#nonPositiveInteger"
671                )
672            }
673            _ => {
674                // Check if the value looks numeric
675                self.as_f64().is_some()
676            }
677        }
678    }
679
680    /// Returns true if this literal represents a boolean value
681    pub fn is_boolean(&self) -> bool {
682        match &self.0 {
683            LiteralContent::TypedLiteral { datatype, .. } => {
684                datatype.as_str() == "http://www.w3.org/2001/XMLSchema#boolean"
685            }
686            _ => self.as_bool().is_some(),
687        }
688    }
689
690    /// Returns the canonical form of this literal
691    ///
692    /// This normalizes the literal according to XSD rules and recommendations
693    pub fn canonical_form(&self) -> Literal {
694        match &self.0 {
695            LiteralContent::TypedLiteral { value, datatype } => {
696                let dt_iri = datatype.as_str();
697                match dt_iri {
698                    "http://www.w3.org/2001/XMLSchema#boolean" => {
699                        if let Some(bool_val) = self.as_bool() {
700                            let canonical_value = if bool_val { "true" } else { "false" };
701                            return Literal::new_typed(canonical_value, datatype.clone());
702                        }
703                    }
704                    "http://www.w3.org/2001/XMLSchema#integer"
705                    | "http://www.w3.org/2001/XMLSchema#long"
706                    | "http://www.w3.org/2001/XMLSchema#int"
707                    | "http://www.w3.org/2001/XMLSchema#short"
708                    | "http://www.w3.org/2001/XMLSchema#byte" => {
709                        if let Some(int_val) = self.as_i64() {
710                            return Literal::new_typed(int_val.to_string(), datatype.clone());
711                        }
712                    }
713                    "http://www.w3.org/2001/XMLSchema#unsignedLong"
714                    | "http://www.w3.org/2001/XMLSchema#unsignedInt"
715                    | "http://www.w3.org/2001/XMLSchema#unsignedShort"
716                    | "http://www.w3.org/2001/XMLSchema#unsignedByte"
717                    | "http://www.w3.org/2001/XMLSchema#positiveInteger"
718                    | "http://www.w3.org/2001/XMLSchema#nonNegativeInteger" => {
719                        if let Some(int_val) = self.as_i64() {
720                            if int_val >= 0 {
721                                return Literal::new_typed(int_val.to_string(), datatype.clone());
722                            }
723                        }
724                    }
725                    "http://www.w3.org/2001/XMLSchema#negativeInteger"
726                    | "http://www.w3.org/2001/XMLSchema#nonPositiveInteger" => {
727                        if let Some(int_val) = self.as_i64() {
728                            if int_val <= 0 {
729                                return Literal::new_typed(int_val.to_string(), datatype.clone());
730                            }
731                        }
732                    }
733                    "http://www.w3.org/2001/XMLSchema#decimal" => {
734                        if let Some(dec_val) = self.as_f64() {
735                            // Format decimal properly - remove trailing zeros after decimal point
736                            let formatted = format!("{dec_val}");
737                            if formatted.contains('.') {
738                                let trimmed = formatted.trim_end_matches('0').trim_end_matches('.');
739                                return Literal::new_typed(
740                                    if trimmed.is_empty() || trimmed == "-" {
741                                        "0"
742                                    } else {
743                                        trimmed
744                                    },
745                                    datatype.clone(),
746                                );
747                            } else {
748                                return Literal::new_typed(
749                                    format!("{formatted}.0"),
750                                    datatype.clone(),
751                                );
752                            }
753                        }
754                    }
755                    "http://www.w3.org/2001/XMLSchema#double"
756                    | "http://www.w3.org/2001/XMLSchema#float" => {
757                        if let Some(float_val) = self.as_f64() {
758                            // Handle special values
759                            if float_val.is_infinite() {
760                                return Literal::new_typed(
761                                    if float_val.is_sign_positive() {
762                                        "INF"
763                                    } else {
764                                        "-INF"
765                                    },
766                                    datatype.clone(),
767                                );
768                            } else if float_val.is_nan() {
769                                return Literal::new_typed("NaN", datatype.clone());
770                            } else {
771                                // Use scientific notation for very large or very small numbers
772                                let formatted = if float_val.abs() >= 1e6
773                                    || (float_val.abs() < 1e-3 && float_val != 0.0)
774                                {
775                                    format!("{float_val:E}")
776                                } else {
777                                    format!("{float_val}")
778                                };
779                                return Literal::new_typed(formatted, datatype.clone());
780                            }
781                        }
782                    }
783                    "http://www.w3.org/2001/XMLSchema#string"
784                    | "http://www.w3.org/2001/XMLSchema#normalizedString" => {
785                        // Normalize whitespace for normalizedString
786                        if dt_iri == "http://www.w3.org/2001/XMLSchema#normalizedString" {
787                            let normalized = value.replace(['\t', '\n', '\r'], " ");
788                            return Literal::new_typed(normalized, datatype.clone());
789                        }
790                    }
791                    "http://www.w3.org/2001/XMLSchema#token" => {
792                        // Normalize whitespace and collapse consecutive spaces
793                        let normalized = value.split_whitespace().collect::<Vec<_>>().join(" ");
794                        return Literal::new_typed(normalized, datatype.clone());
795                    }
796                    _ => {}
797                }
798            }
799            LiteralContent::LanguageTaggedString { value, language } => {
800                // Keep original case for language tags to match RFC 5646 best practices
801                return Self(LiteralContent::LanguageTaggedString {
802                    value: value.clone(),
803                    language: language.clone(),
804                });
805            }
806            _ => {}
807        }
808        self.clone()
809    }
810
811    /// Validates this literal against its datatype (if any)
812    pub fn validate(&self) -> Result<(), OxirsError> {
813        match &self.0 {
814            LiteralContent::String(_) => Ok(()),
815            LiteralContent::LanguageTaggedString { language, .. } => {
816                validate_language_tag(language).map_err(Into::into)
817            }
818            #[cfg(feature = "rdf-12")]
819            LiteralContent::DirectionalLanguageTaggedString { language, .. } => {
820                validate_language_tag(language).map_err(Into::into)
821            }
822            LiteralContent::TypedLiteral { value, datatype } => {
823                validate_xsd_value(value, datatype.as_str())
824            }
825        }
826    }
827}
828
829impl fmt::Display for Literal {
830    #[inline]
831    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
832        self.as_ref().fmt(f)
833    }
834}
835
836impl RdfTerm for Literal {
837    fn as_str(&self) -> &str {
838        self.value()
839    }
840
841    fn is_literal(&self) -> bool {
842        true
843    }
844}
845
846impl ObjectTerm for Literal {}
847
848/// A borrowed RDF [literal](https://www.w3.org/TR/rdf11-concepts/#dfn-literal).
849///
850/// The default string formatter is returning an N-Triples, Turtle, and SPARQL compatible representation:
851/// ```
852/// use oxirs_core::model::literal::LiteralRef;
853/// use oxirs_core::vocab::xsd;
854///
855/// assert_eq!(
856///     "\"foo\\nbar\"",
857///     LiteralRef::new_simple_literal("foo\nbar").to_string()
858/// );
859///
860/// assert_eq!(
861///     r#""1999-01-01"^^<http://www.w3.org/2001/XMLSchema#date>"#,
862///     LiteralRef::new_typed_literal("1999-01-01", xsd::DATE.as_ref()).to_string()
863/// );
864/// ```
865#[derive(Eq, PartialEq, Debug, Clone, Copy, Hash)]
866pub struct LiteralRef<'a>(LiteralRefContent<'a>);
867
868#[derive(PartialEq, Eq, Debug, Clone, Copy, Hash)]
869enum LiteralRefContent<'a> {
870    String(&'a str),
871    LanguageTaggedString {
872        value: &'a str,
873        language: &'a str,
874    },
875    #[cfg(feature = "rdf-12")]
876    DirectionalLanguageTaggedString {
877        value: &'a str,
878        language: &'a str,
879        direction: BaseDirection,
880    },
881    TypedLiteral {
882        value: &'a str,
883        datatype: NamedNodeRef<'a>,
884    },
885}
886
887impl<'a> LiteralRef<'a> {
888    /// Builds an RDF [simple literal](https://www.w3.org/TR/rdf11-concepts/#dfn-simple-literal).
889    #[inline]
890    pub const fn new_simple_literal(value: &'a str) -> Self {
891        LiteralRef(LiteralRefContent::String(value))
892    }
893
894    /// Creates a new literal reference (alias for compatibility)
895    #[inline]
896    pub const fn new(value: &'a str) -> Self {
897        Self::new_simple_literal(value)
898    }
899
900    /// Builds an RDF [literal](https://www.w3.org/TR/rdf11-concepts/#dfn-literal) with a [datatype](https://www.w3.org/TR/rdf11-concepts/#dfn-datatype-iri).
901    #[inline]
902    pub fn new_typed_literal(value: &'a str, datatype: impl Into<NamedNodeRef<'a>>) -> Self {
903        let datatype = datatype.into();
904        LiteralRef(if datatype == xsd::STRING.as_ref() {
905            LiteralRefContent::String(value)
906        } else {
907            LiteralRefContent::TypedLiteral { value, datatype }
908        })
909    }
910
911    /// Creates a new typed literal reference (alias for compatibility)
912    #[inline]
913    pub fn new_typed(value: &'a str, datatype: NamedNodeRef<'a>) -> Self {
914        Self::new_typed_literal(value, datatype)
915    }
916
917    /// Builds an RDF [language-tagged string](https://www.w3.org/TR/rdf11-concepts/#dfn-language-tagged-string).
918    ///
919    /// It is the responsibility of the caller to check that `language`
920    /// is valid [BCP47](https://tools.ietf.org/html/bcp47) language tag,
921    /// and is lowercase.
922    ///
923    /// [`Literal::new_language_tagged_literal()`] is a safe version of this constructor and should be used for untrusted data.
924    #[inline]
925    pub const fn new_language_tagged_literal_unchecked(value: &'a str, language: &'a str) -> Self {
926        LiteralRef(LiteralRefContent::LanguageTaggedString { value, language })
927    }
928
929    /// Creates a new language-tagged literal reference (alias for compatibility)
930    #[inline]
931    pub const fn new_lang(value: &'a str, language: &'a str) -> Self {
932        Self::new_language_tagged_literal_unchecked(value, language)
933    }
934
935    /// Builds an RDF [directional language-tagged string](https://www.w3.org/TR/rdf12-concepts/#dfn-dir-lang-string).
936    ///
937    /// It is the responsibility of the caller to check that `language`
938    /// is valid [BCP47](https://tools.ietf.org/html/bcp47) language tag,
939    /// and is lowercase.
940    ///
941    /// [`Literal::new_directional_language_tagged_literal()`] is a safe version of this constructor and should be used for untrusted data.
942    #[cfg(feature = "rdf-12")]
943    #[inline]
944    pub const fn new_directional_language_tagged_literal_unchecked(
945        value: &'a str,
946        language: &'a str,
947        direction: BaseDirection,
948    ) -> Self {
949        LiteralRef(LiteralRefContent::DirectionalLanguageTaggedString {
950            value,
951            language,
952            direction,
953        })
954    }
955
956    /// The literal [lexical form](https://www.w3.org/TR/rdf11-concepts/#dfn-lexical-form)
957    #[inline]
958    pub const fn value(self) -> &'a str {
959        match self.0 {
960            LiteralRefContent::String(value)
961            | LiteralRefContent::LanguageTaggedString { value, .. }
962            | LiteralRefContent::TypedLiteral { value, .. } => value,
963            #[cfg(feature = "rdf-12")]
964            LiteralRefContent::DirectionalLanguageTaggedString { value, .. } => value,
965        }
966    }
967
968    /// The literal [language tag](https://www.w3.org/TR/rdf11-concepts/#dfn-language-tag) if it is a [language-tagged string](https://www.w3.org/TR/rdf11-concepts/#dfn-language-tagged-string).
969    ///
970    /// Language tags are defined by the [BCP47](https://tools.ietf.org/html/bcp47).
971    /// They are normalized to lowercase by this implementation.
972    #[inline]
973    pub const fn language(self) -> Option<&'a str> {
974        match self.0 {
975            LiteralRefContent::LanguageTaggedString { language, .. } => Some(language),
976            #[cfg(feature = "rdf-12")]
977            LiteralRefContent::DirectionalLanguageTaggedString { language, .. } => Some(language),
978            _ => None,
979        }
980    }
981
982    /// The literal [base direction](https://www.w3.org/TR/rdf12-concepts/#dfn-base-direction) if it is a [directional language-tagged string](https://www.w3.org/TR/rdf12-concepts/#dfn-base-direction).
983    ///
984    /// The two possible base directions are left-to-right (`ltr`) and right-to-left (`rtl`).
985    #[cfg(feature = "rdf-12")]
986    #[inline]
987    pub const fn direction(self) -> Option<BaseDirection> {
988        match self.0 {
989            LiteralRefContent::DirectionalLanguageTaggedString { direction, .. } => Some(direction),
990            _ => None,
991        }
992    }
993
994    /// The literal [datatype](https://www.w3.org/TR/rdf11-concepts/#dfn-datatype-iri).
995    ///
996    /// The datatype of [language-tagged string](https://www.w3.org/TR/rdf11-concepts/#dfn-language-tagged-string) is always [rdf:langString](https://www.w3.org/TR/rdf11-concepts/#dfn-language-tagged-string).
997    /// The datatype of [simple literals](https://www.w3.org/TR/rdf11-concepts/#dfn-simple-literal) is [xsd:string](https://www.w3.org/TR/xmlschema11-2/#string).
998    #[inline]
999    pub fn datatype(self) -> NamedNodeRef<'a> {
1000        match self.0 {
1001            LiteralRefContent::String(_) => xsd::STRING.as_ref(),
1002            LiteralRefContent::LanguageTaggedString { .. } => rdf::LANG_STRING.as_ref(),
1003            #[cfg(feature = "rdf-12")]
1004            LiteralRefContent::DirectionalLanguageTaggedString { .. } => {
1005                rdf::DIR_LANG_STRING.as_ref()
1006            }
1007            LiteralRefContent::TypedLiteral { datatype, .. } => datatype,
1008        }
1009    }
1010
1011    /// Checks if this literal could be seen as an RDF 1.0 [plain literal](https://www.w3.org/TR/2004/REC-rdf-concepts-20040210/#dfn-plain-literal).
1012    ///
1013    /// It returns true if the literal is a [language-tagged string](https://www.w3.org/TR/rdf11-concepts/#dfn-language-tagged-string)
1014    /// or has the datatype [xsd:string](https://www.w3.org/TR/xmlschema11-2/#string).
1015    #[inline]
1016    #[deprecated(note = "Plain literal concept is removed in RDF 1.1", since = "0.3.0")]
1017    pub const fn is_plain(self) -> bool {
1018        matches!(
1019            self.0,
1020            LiteralRefContent::String(_) | LiteralRefContent::LanguageTaggedString { .. }
1021        )
1022    }
1023
1024    #[inline]
1025    pub fn into_owned(self) -> Literal {
1026        Literal(match self.0 {
1027            LiteralRefContent::String(value) => LiteralContent::String(value.to_owned()),
1028            LiteralRefContent::LanguageTaggedString { value, language } => {
1029                LiteralContent::LanguageTaggedString {
1030                    value: value.to_owned(),
1031                    language: language.to_owned(),
1032                }
1033            }
1034            #[cfg(feature = "rdf-12")]
1035            LiteralRefContent::DirectionalLanguageTaggedString {
1036                value,
1037                language,
1038                direction,
1039            } => LiteralContent::DirectionalLanguageTaggedString {
1040                value: value.to_owned(),
1041                language: language.to_owned(),
1042                direction,
1043            },
1044            LiteralRefContent::TypedLiteral { value, datatype } => LiteralContent::TypedLiteral {
1045                value: value.to_owned(),
1046                datatype: datatype.into_owned(),
1047            },
1048        })
1049    }
1050
1051    /// Converts to an owned Literal (alias for compatibility)
1052    #[inline]
1053    pub fn to_owned(&self) -> Literal {
1054        self.into_owned()
1055    }
1056}
1057
1058impl fmt::Display for LiteralRef<'_> {
1059    #[inline]
1060    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1061        match self.0 {
1062            LiteralRefContent::String(value) => print_quoted_str(value, f),
1063            LiteralRefContent::LanguageTaggedString { value, language } => {
1064                print_quoted_str(value, f)?;
1065                write!(f, "@{language}")
1066            }
1067            #[cfg(feature = "rdf-12")]
1068            LiteralRefContent::DirectionalLanguageTaggedString {
1069                value,
1070                language,
1071                direction,
1072            } => {
1073                print_quoted_str(value, f)?;
1074                write!(f, "@{language}--{direction}")
1075            }
1076            LiteralRefContent::TypedLiteral { value, datatype } => {
1077                print_quoted_str(value, f)?;
1078                write!(f, "^^{datatype}")
1079            }
1080        }
1081    }
1082}
1083
1084impl<'a> RdfTerm for LiteralRef<'a> {
1085    fn as_str(&self) -> &str {
1086        self.value()
1087    }
1088
1089    fn is_literal(&self) -> bool {
1090        true
1091    }
1092}
1093
1094/// Helper function to print a quoted string with proper escaping
1095#[inline]
1096pub fn print_quoted_str(string: &str, f: &mut impl Write) -> fmt::Result {
1097    f.write_char('"')?;
1098    for c in string.chars() {
1099        match c {
1100            '\u{08}' => f.write_str("\\b"),
1101            '\t' => f.write_str("\\t"),
1102            '\n' => f.write_str("\\n"),
1103            '\u{0C}' => f.write_str("\\f"),
1104            '\r' => f.write_str("\\r"),
1105            '"' => f.write_str("\\\""),
1106            '\\' => f.write_str("\\\\"),
1107            '\0'..='\u{1F}' | '\u{7F}' => write!(f, "\\u{:04X}", u32::from(c)),
1108            _ => f.write_char(c),
1109        }?;
1110    }
1111    f.write_char('"')
1112}
1113
1114/// A [directional language-tagged string](https://www.w3.org/TR/rdf12-concepts/#dfn-dir-lang-string) [base-direction](https://www.w3.org/TR/rdf12-concepts/#dfn-base-direction)
1115#[cfg(feature = "rdf-12")]
1116#[derive(Eq, PartialEq, Debug, Clone, Copy, Hash, PartialOrd, Ord)]
1117#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
1118pub enum BaseDirection {
1119    /// the initial text direction is set to left-to-right
1120    Ltr,
1121    /// the initial text direction is set to right-to-left
1122    Rtl,
1123}
1124
1125#[cfg(feature = "rdf-12")]
1126impl fmt::Display for BaseDirection {
1127    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1128        f.write_str(match self {
1129            Self::Ltr => "ltr",
1130            Self::Rtl => "rtl",
1131        })
1132    }
1133}
1134
1135impl<'a> From<&'a Literal> for LiteralRef<'a> {
1136    #[inline]
1137    fn from(node: &'a Literal) -> Self {
1138        node.as_ref()
1139    }
1140}
1141
1142impl<'a> From<LiteralRef<'a>> for Literal {
1143    #[inline]
1144    fn from(node: LiteralRef<'a>) -> Self {
1145        node.into_owned()
1146    }
1147}
1148
1149impl<'a> From<&'a str> for LiteralRef<'a> {
1150    #[inline]
1151    fn from(value: &'a str) -> Self {
1152        LiteralRef(LiteralRefContent::String(value))
1153    }
1154}
1155
1156impl PartialEq<Literal> for LiteralRef<'_> {
1157    #[inline]
1158    fn eq(&self, other: &Literal) -> bool {
1159        *self == other.as_ref()
1160    }
1161}
1162
1163impl PartialEq<LiteralRef<'_>> for Literal {
1164    #[inline]
1165    fn eq(&self, other: &LiteralRef<'_>) -> bool {
1166        self.as_ref() == *other
1167    }
1168}
1169
1170// Implement standard From traits
1171impl<'a> From<&'a str> for Literal {
1172    #[inline]
1173    fn from(value: &'a str) -> Self {
1174        Self(LiteralContent::String(value.into()))
1175    }
1176}
1177
1178impl From<String> for Literal {
1179    #[inline]
1180    fn from(value: String) -> Self {
1181        Self(LiteralContent::String(value))
1182    }
1183}
1184
1185impl<'a> From<Cow<'a, str>> for Literal {
1186    #[inline]
1187    fn from(value: Cow<'a, str>) -> Self {
1188        Self(LiteralContent::String(value.into()))
1189    }
1190}
1191
1192impl From<bool> for Literal {
1193    #[inline]
1194    fn from(value: bool) -> Self {
1195        Self(LiteralContent::TypedLiteral {
1196            value: value.to_string(),
1197            datatype: xsd::BOOLEAN.clone(),
1198        })
1199    }
1200}
1201
1202impl From<i128> for Literal {
1203    #[inline]
1204    fn from(value: i128) -> Self {
1205        Self(LiteralContent::TypedLiteral {
1206            value: value.to_string(),
1207            datatype: xsd::INTEGER.clone(),
1208        })
1209    }
1210}
1211
1212impl From<i64> for Literal {
1213    #[inline]
1214    fn from(value: i64) -> Self {
1215        Self(LiteralContent::TypedLiteral {
1216            value: value.to_string(),
1217            datatype: xsd::INTEGER.clone(),
1218        })
1219    }
1220}
1221
1222impl From<i32> for Literal {
1223    #[inline]
1224    fn from(value: i32) -> Self {
1225        Self(LiteralContent::TypedLiteral {
1226            value: value.to_string(),
1227            datatype: xsd::INTEGER.clone(),
1228        })
1229    }
1230}
1231
1232impl From<i16> for Literal {
1233    #[inline]
1234    fn from(value: i16) -> Self {
1235        Self(LiteralContent::TypedLiteral {
1236            value: value.to_string(),
1237            datatype: xsd::INTEGER.clone(),
1238        })
1239    }
1240}
1241
1242impl From<u64> for Literal {
1243    #[inline]
1244    fn from(value: u64) -> Self {
1245        Self(LiteralContent::TypedLiteral {
1246            value: value.to_string(),
1247            datatype: xsd::INTEGER.clone(),
1248        })
1249    }
1250}
1251
1252impl From<u32> for Literal {
1253    #[inline]
1254    fn from(value: u32) -> Self {
1255        Self(LiteralContent::TypedLiteral {
1256            value: value.to_string(),
1257            datatype: xsd::INTEGER.clone(),
1258        })
1259    }
1260}
1261
1262impl From<u16> for Literal {
1263    #[inline]
1264    fn from(value: u16) -> Self {
1265        Self(LiteralContent::TypedLiteral {
1266            value: value.to_string(),
1267            datatype: xsd::INTEGER.clone(),
1268        })
1269    }
1270}
1271
1272impl From<f32> for Literal {
1273    #[inline]
1274    fn from(value: f32) -> Self {
1275        Self(LiteralContent::TypedLiteral {
1276            value: if value == f32::INFINITY {
1277                "INF".to_owned()
1278            } else if value == f32::NEG_INFINITY {
1279                "-INF".to_owned()
1280            } else {
1281                value.to_string()
1282            },
1283            datatype: xsd::FLOAT.clone(),
1284        })
1285    }
1286}
1287
1288impl From<f64> for Literal {
1289    #[inline]
1290    fn from(value: f64) -> Self {
1291        Self(LiteralContent::TypedLiteral {
1292            value: if value == f64::INFINITY {
1293                "INF".to_owned()
1294            } else if value == f64::NEG_INFINITY {
1295                "-INF".to_owned()
1296            } else {
1297                value.to_string()
1298            },
1299            datatype: xsd::DOUBLE.clone(),
1300        })
1301    }
1302}
1303
1304/// Common XSD datatypes as constants and convenience functions
1305pub mod xsd_literals {
1306    use super::*;
1307    use crate::vocab::xsd;
1308
1309    // Convenience functions for creating typed literals
1310
1311    /// Creates a boolean literal
1312    pub fn boolean_literal(value: bool) -> Literal {
1313        Literal::new_typed(value.to_string(), xsd::BOOLEAN.clone())
1314    }
1315
1316    /// Creates an integer literal
1317    pub fn integer_literal(value: i64) -> Literal {
1318        Literal::new_typed(value.to_string(), xsd::INTEGER.clone())
1319    }
1320
1321    /// Creates a decimal literal
1322    pub fn decimal_literal(value: f64) -> Literal {
1323        Literal::new_typed(value.to_string(), xsd::DECIMAL.clone())
1324    }
1325
1326    /// Creates a double literal
1327    pub fn double_literal(value: f64) -> Literal {
1328        Literal::new_typed(value.to_string(), xsd::DOUBLE.clone())
1329    }
1330
1331    /// Creates a string literal
1332    pub fn string_literal(value: &str) -> Literal {
1333        Literal::new_typed(value, xsd::STRING.clone())
1334    }
1335}
1336
1337#[cfg(test)]
1338mod tests {
1339    use super::*;
1340
1341    #[test]
1342    fn test_simple_literal_equality() {
1343        assert_eq!(
1344            Literal::new_simple_literal("foo"),
1345            Literal::new_typed_literal("foo", xsd::STRING.clone())
1346        );
1347        assert_eq!(
1348            Literal::new_simple_literal("foo"),
1349            LiteralRef::new_typed_literal("foo", xsd::STRING.as_ref())
1350        );
1351        assert_eq!(
1352            LiteralRef::new_simple_literal("foo"),
1353            Literal::new_typed_literal("foo", xsd::STRING.clone())
1354        );
1355        assert_eq!(
1356            LiteralRef::new_simple_literal("foo"),
1357            LiteralRef::new_typed_literal("foo", xsd::STRING.as_ref())
1358        );
1359    }
1360
1361    #[test]
1362    fn test_float_format() {
1363        assert_eq!("INF", Literal::from(f32::INFINITY).value());
1364        assert_eq!("INF", Literal::from(f64::INFINITY).value());
1365        assert_eq!("-INF", Literal::from(f32::NEG_INFINITY).value());
1366        assert_eq!("-INF", Literal::from(f64::NEG_INFINITY).value());
1367        assert_eq!("NaN", Literal::from(f32::NAN).value());
1368        assert_eq!("NaN", Literal::from(f64::NAN).value());
1369    }
1370
1371    #[test]
1372    fn test_plain_literal() {
1373        let literal = Literal::new("Hello");
1374        assert_eq!(literal.value(), "Hello");
1375        #[allow(deprecated)]
1376        {
1377            assert!(literal.is_plain());
1378        }
1379        assert!(!literal.is_lang_string());
1380        assert!(!literal.is_typed());
1381        assert_eq!(format!("{literal}"), "\"Hello\"");
1382    }
1383
1384    #[test]
1385    fn test_lang_literal() {
1386        let literal = Literal::new_lang("Hello", "en").unwrap();
1387        assert_eq!(literal.value(), "Hello");
1388        assert_eq!(literal.language(), Some("en"));
1389        #[allow(deprecated)]
1390        {
1391            assert!(literal.is_plain());
1392        }
1393        assert!(literal.is_lang_string());
1394        assert!(!literal.is_typed());
1395        assert_eq!(format!("{literal}"), "\"Hello\"@en");
1396    }
1397
1398    #[test]
1399    fn test_typed_literal() {
1400        let literal = Literal::new_typed("42", xsd::INTEGER.clone());
1401        assert_eq!(literal.value(), "42");
1402        assert_eq!(
1403            literal.datatype().as_str(),
1404            "http://www.w3.org/2001/XMLSchema#integer"
1405        );
1406        #[allow(deprecated)]
1407        {
1408            assert!(!literal.is_plain());
1409        }
1410        assert!(!literal.is_lang_string());
1411        assert!(literal.is_typed());
1412        assert_eq!(
1413            format!("{literal}"),
1414            "\"42\"^^<http://www.w3.org/2001/XMLSchema#integer>"
1415        );
1416    }
1417
1418    #[test]
1419    fn test_literal_ref() {
1420        let literal_ref = LiteralRef::new("test");
1421        assert_eq!(literal_ref.value(), "test");
1422
1423        let owned = literal_ref.to_owned();
1424        assert_eq!(owned.value(), "test");
1425    }
1426
1427    #[test]
1428    fn test_boolean_extraction() {
1429        let bool_literal = xsd_literals::boolean_literal(true);
1430        assert!(bool_literal.is_boolean());
1431        assert_eq!(bool_literal.as_bool(), Some(true));
1432
1433        let false_literal = Literal::new_typed("false", xsd::BOOLEAN.clone());
1434        assert_eq!(false_literal.as_bool(), Some(false));
1435
1436        // Test string representations
1437        let true_str = Literal::new("true");
1438        assert_eq!(true_str.as_bool(), Some(true));
1439
1440        let false_str = Literal::new("0");
1441        assert_eq!(false_str.as_bool(), Some(false));
1442    }
1443
1444    #[test]
1445    fn test_numeric_extraction() {
1446        let int_literal = xsd_literals::integer_literal(42);
1447        assert!(int_literal.is_numeric());
1448        assert_eq!(int_literal.as_i64(), Some(42));
1449        assert_eq!(int_literal.as_i32(), Some(42));
1450        assert_eq!(int_literal.as_f64(), Some(42.0));
1451
1452        let decimal_literal = xsd_literals::decimal_literal(3.25);
1453        assert!(decimal_literal.is_numeric());
1454        assert_eq!(decimal_literal.as_f64(), Some(3.25));
1455        assert_eq!(decimal_literal.as_f32(), Some(3.25_f32));
1456
1457        // Test untyped numeric strings
1458        let untyped_num = Literal::new("123");
1459        assert!(untyped_num.is_numeric());
1460        assert_eq!(untyped_num.as_i64(), Some(123));
1461    }
1462
1463    #[test]
1464    fn test_canonical_form() {
1465        // Boolean canonicalization
1466        let bool_literal = Literal::new_typed("True", xsd::BOOLEAN.clone());
1467        let canonical = bool_literal.canonical_form();
1468        assert_eq!(canonical.value(), "true");
1469
1470        // Integer canonicalization
1471        let int_literal = Literal::new_typed("  42  ", xsd::INTEGER.clone());
1472        // Note: This would need actual whitespace trimming in canonical form
1473        // For now, just test that it returns a valid canonical form
1474        let canonical = int_literal.canonical_form();
1475        assert_eq!(
1476            canonical.datatype().as_str(),
1477            "http://www.w3.org/2001/XMLSchema#integer"
1478        );
1479
1480        // Decimal canonicalization
1481        let dec_literal = Literal::new_typed("3.140", xsd::DECIMAL.clone());
1482        let canonical = dec_literal.canonical_form();
1483        assert_eq!(canonical.value(), "3.14"); // Should remove trailing zeros
1484    }
1485
1486    #[test]
1487    fn test_xsd_convenience_functions() {
1488        // Test all the convenience functions work
1489        assert_eq!(xsd_literals::boolean_literal(true).value(), "true");
1490        assert_eq!(xsd_literals::integer_literal(123).value(), "123");
1491        assert_eq!(xsd_literals::decimal_literal(3.25).value(), "3.25");
1492        assert_eq!(xsd_literals::double_literal(2.71).value(), "2.71");
1493        assert_eq!(xsd_literals::string_literal("hello").value(), "hello");
1494
1495        // Test datatype assignments
1496        assert_eq!(
1497            xsd_literals::boolean_literal(true).datatype().as_str(),
1498            "http://www.w3.org/2001/XMLSchema#boolean"
1499        );
1500        assert_eq!(
1501            xsd_literals::integer_literal(123).datatype().as_str(),
1502            "http://www.w3.org/2001/XMLSchema#integer"
1503        );
1504    }
1505
1506    #[test]
1507    fn test_numeric_type_detection() {
1508        // Test various numeric types
1509        let int_lit = Literal::new_typed("42", xsd::INTEGER.clone());
1510        assert!(int_lit.is_numeric());
1511
1512        let float_lit = Literal::new_typed("3.14", xsd::FLOAT.clone());
1513        assert!(float_lit.is_numeric());
1514
1515        let double_lit = Literal::new_typed("2.71", xsd::DOUBLE.clone());
1516        assert!(double_lit.is_numeric());
1517
1518        // Non-numeric types
1519        let string_lit = Literal::new_typed("hello", xsd::STRING.clone());
1520        assert!(!string_lit.is_numeric());
1521
1522        let bool_lit = Literal::new_typed("true", xsd::BOOLEAN.clone());
1523        assert!(!bool_lit.is_numeric());
1524    }
1525}