Skip to main content

xsd_schema/types/
facets.rs

1//! XSD constraining facets
2//!
3//! This module implements the XSD facet system for constraining simple types.
4//! Facets can restrict length, numeric range, pattern matching, enumeration, and whitespace.
5//!
6//! ## XSD Facet Categories
7//!
8//! - **Length facets**: length, minLength, maxLength (for string, binary, list types)
9//! - **Numeric precision facets**: totalDigits, fractionDigits (for decimal types)
10//! - **Bound facets**: minInclusive, maxInclusive, minExclusive, maxExclusive
11//! - **String facets**: pattern, enumeration, whitespace
12//! - **XSD 1.1 facets**: explicitTimezone, assertion
13//!
14//! ## Facet Inheritance
15//!
16//! When deriving a simple type by restriction:
17//! - Derived facets must be more restrictive than base facets
18//! - Fixed facets cannot be overridden with different values
19//! - Patterns are cumulative (ANDed together)
20//! - Enumerations must be subsets of base enumerations
21
22use crate::error::{FacetError, FacetResult};
23use crate::namespace::context::NamespaceContextSnapshot;
24use crate::parser::location::SourceRef;
25use crate::regex_convert::lenient_ms_preprocess;
26#[cfg(feature = "xsd11")]
27use crate::regex_convert::rewrite_xsd10_category_escapes;
28use crate::regex_convert::validate_xml_pattern_syntax;
29#[cfg(not(feature = "xsd11"))]
30use crate::regex_convert::{convert_xml_pattern, ConvertOptions};
31use crate::schema::model::{RegexCompat, XsdVersion};
32#[cfg(not(feature = "xsd11"))]
33use regex::Regex;
34use std::collections::HashSet;
35
36#[cfg(feature = "xsd11")]
37use std::sync::Arc;
38
39use super::XmlTypeCode;
40
41/// Fixed vs default facet values
42#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
43pub enum FacetFixed {
44    /// Value can be further restricted
45    #[default]
46    Default,
47    /// Value cannot be changed by derived types
48    Fixed,
49}
50
51/// Whitespace handling mode
52#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
53pub enum WhitespaceMode {
54    /// Preserve all whitespace
55    Preserve,
56    /// Replace tabs/newlines with spaces
57    Replace,
58    /// Collapse consecutive whitespace to single space, trim
59    #[default]
60    Collapse,
61}
62
63/// Length facet (exact length constraint)
64#[derive(Debug, Clone)]
65pub struct LengthFacet {
66    pub value: u64,
67    pub fixed: FacetFixed,
68    pub source: Option<SourceRef>,
69}
70
71/// MinLength facet
72#[derive(Debug, Clone)]
73pub struct MinLengthFacet {
74    pub value: u64,
75    pub fixed: FacetFixed,
76    pub source: Option<SourceRef>,
77}
78
79/// MaxLength facet
80#[derive(Debug, Clone)]
81pub struct MaxLengthFacet {
82    pub value: u64,
83    pub fixed: FacetFixed,
84    pub source: Option<SourceRef>,
85}
86
87/// Pattern facet (regex constraint)
88#[derive(Debug, Clone)]
89pub struct PatternFacet {
90    /// The pattern string (XSD regex syntax)
91    pub value: String,
92    /// Compiled regex for efficient matching
93    #[cfg(not(feature = "xsd11"))]
94    compiled: Option<Regex>,
95    #[cfg(feature = "xsd11")]
96    compiled: Option<Arc<regexml::Regex>>,
97    pub source: Option<SourceRef>,
98}
99
100impl PatternFacet {
101    /// Create a new pattern facet from an XSD pattern string.
102    ///
103    /// The pattern is validated and compiled using the appropriate backend
104    /// (XSD 1.0: `regex` via `convert_xml_pattern`; XSD 1.1: `regexml` after
105    /// a `\p{X}` rewrite if `xsd_version == V1_0`). Returns an error if the
106    /// pattern is invalid.
107    ///
108    /// `xsd_version` controls the `\p{X}` category escape semantics: under
109    /// `V1_0` recognized general-category names are expanded to Unicode 3.0
110    /// ranges; `V1_1` passes them through to the backend unchanged.
111    ///
112    /// `regex_compat` controls grammar leniency: `Strict` enforces XSD
113    /// Part 2 §F/§G; `LenientMs` first applies [`lenient_ms_preprocess`]
114    /// to drop start/end anchors and `(?#…)` comments common in
115    /// .NET-authored schemas.
116    pub fn new(
117        value: String,
118        source: Option<SourceRef>,
119        xsd_version: XsdVersion,
120        regex_compat: RegexCompat,
121    ) -> FacetResult<Self> {
122        let mut facet = Self::new_unchecked(value, source);
123        facet.compile(xsd_version, regex_compat)?;
124        Ok(facet)
125    }
126
127    /// Create a pattern facet without compiling (for deferred compilation)
128    pub fn new_unchecked(value: String, source: Option<SourceRef>) -> Self {
129        Self {
130            value,
131            compiled: None,
132            source,
133        }
134    }
135
136    /// Compile the pattern if not already compiled
137    #[cfg(not(feature = "xsd11"))]
138    pub fn compile(
139        &mut self,
140        xsd_version: XsdVersion,
141        regex_compat: RegexCompat,
142    ) -> FacetResult<()> {
143        if self.compiled.is_none() {
144            let effective: std::borrow::Cow<'_, str> = match regex_compat {
145                RegexCompat::Strict => std::borrow::Cow::Borrowed(self.value.as_str()),
146                RegexCompat::LenientMs => lenient_ms_preprocess(&self.value),
147            };
148            // The XSD 1.0 hyphen-rule check is part of the strict §F grammar
149            // gate; skip it under LenientMs so the engine alone decides.
150            if xsd_version == XsdVersion::V1_0 && regex_compat == RegexCompat::Strict {
151                validate_xml_pattern_syntax(&effective).map_err(|message| {
152                    FacetError::InvalidPattern {
153                        pattern: self.value.clone(),
154                        message,
155                    }
156                })?;
157            }
158            let opts = match xsd_version {
159                XsdVersion::V1_0 => ConvertOptions::xsd_v1_0(),
160                XsdVersion::V1_1 => ConvertOptions::xsd(),
161            };
162            let rust_pattern = convert_xml_pattern(&effective, opts);
163            let compiled = Regex::new(&rust_pattern).map_err(|e| FacetError::InvalidPattern {
164                pattern: self.value.clone(),
165                message: e.to_string(),
166            })?;
167            self.compiled = Some(compiled);
168        }
169        Ok(())
170    }
171
172    /// Compile the pattern if not already compiled
173    #[cfg(feature = "xsd11")]
174    pub fn compile(
175        &mut self,
176        xsd_version: XsdVersion,
177        regex_compat: RegexCompat,
178    ) -> FacetResult<()> {
179        if self.compiled.is_none() {
180            let strict = regex_compat == RegexCompat::Strict;
181            // Apply MS dialect preprocess (closed list) before grammar
182            // validation when LenientMs is on. Owned to keep one stable
183            // backing string across the two-step validate+rewrite below.
184            let effective: String = if strict {
185                self.value.clone()
186            } else {
187                lenient_ms_preprocess(&self.value).into_owned()
188            };
189            // Strict XSD 1.0 hyphen-rule grammar gate. Skipped under
190            // LenientMs so the engine alone decides what is well-formed.
191            if xsd_version == XsdVersion::V1_0 && strict {
192                validate_xml_pattern_syntax(&effective).map_err(|message| {
193                    FacetError::InvalidPattern {
194                        pattern: self.value.clone(),
195                        message,
196                    }
197                })?;
198            }
199            // Strict XSD §F/§G grammar gate via regexml `xsd()`. Skipped
200            // under LenientMs — the runtime matcher uses regexml `xpath()`
201            // (next step), which natively accepts XPath-only constructs
202            // like `^`/`$` outside char class, backrefs `\1`, non-capturing
203            // `(?:...)`, and reluctant quantifiers `*?` that are valid
204            // .NET regex idioms. For XSD 1.1, an unrecognized `\p{IsX}`
205            // block name is treated as matching every character (W3C bug
206            // 13670 / XSD 1.1 Datatypes §G.4.2.3); the rewrite remains in
207            // place under both modes because it is a spec rule, not a
208            // grammar choice (regexml 0.2 does not yet honour
209            // `allow_unknown_block_names`).
210            let xsd_validated: std::borrow::Cow<'_, str> = match xsd_version {
211                XsdVersion::V1_0 => {
212                    if strict {
213                        regexml::Regex::xsd(&effective, "").map_err(|e| {
214                            FacetError::InvalidPattern {
215                                pattern: self.value.clone(),
216                                message: format!("{:?}", e),
217                            }
218                        })?;
219                    }
220                    std::borrow::Cow::Borrowed(effective.as_str())
221                }
222                XsdVersion::V1_1 => validate_xsd11_pattern_with_block_fallback(&effective)?,
223            };
224            // Under XSD 1.0 the \p{X} rewrite produces a new String; under 1.1
225            // we use the (possibly block-rewritten) validated value.
226            let pinned: std::borrow::Cow<'_, str> = match xsd_version {
227                XsdVersion::V1_0 => {
228                    std::borrow::Cow::Owned(rewrite_xsd10_category_escapes(&effective))
229                }
230                XsdVersion::V1_1 => xsd_validated,
231            };
232            // Compile with explicit anchoring for full-string matching
233            let anchored = format!("^(?:{})$", pinned);
234            let compiled =
235                regexml::Regex::xpath(&anchored, "").map_err(|e| FacetError::InvalidPattern {
236                    pattern: self.value.clone(),
237                    message: format!("{:?}", e),
238                })?;
239            self.compiled = Some(Arc::new(compiled));
240        }
241        Ok(())
242    }
243
244    /// Test if a value matches this pattern
245    #[cfg(not(feature = "xsd11"))]
246    pub fn matches(&self, value: &str) -> bool {
247        match &self.compiled {
248            Some(regex) => regex.is_match(value),
249            None => {
250                // Defensive fallback: compile on-the-fly using XSD 1.1 defaults.
251                // Reached only if a facet was never compiled via `compile_patterns`.
252                if let Ok(rust_pattern) = std::panic::catch_unwind(|| {
253                    convert_xml_pattern(&self.value, ConvertOptions::xsd())
254                }) {
255                    if let Ok(regex) = Regex::new(&rust_pattern) {
256                        return regex.is_match(value);
257                    }
258                }
259                false
260            }
261        }
262    }
263
264    /// Test if a value matches this pattern
265    #[cfg(feature = "xsd11")]
266    pub fn matches(&self, value: &str) -> bool {
267        match &self.compiled {
268            Some(regex) => regex.is_match(value),
269            None => {
270                // Defensive fallback: validate and compile on-the-fly with XSD 1.1
271                // defaults. Reached only if a facet was never compiled via
272                // `compile_patterns`.
273                if let Ok(rewritten) = validate_xsd11_pattern_with_block_fallback(&self.value) {
274                    let anchored = format!("^(?:{})$", rewritten);
275                    if let Ok(regex) = regexml::Regex::xpath(&anchored, "") {
276                        return regex.is_match(value);
277                    }
278                }
279                false
280            }
281        }
282    }
283}
284
285/// Validate an XSD 1.1 pattern with regexml's strict XSD parser, rewriting any
286/// unknown `\p{IsX}` / `\P{IsX}` block names to a match-everything expression
287/// per W3C bug 13670 / XSD 1.1 Datatypes §G.4.2.3 (unrecognized block names are
288/// allowed and match every character). Returns the (possibly rewritten) pattern
289/// or a `FacetError` if a non-block-name error remains after up to 16 rewrites.
290#[cfg(feature = "xsd11")]
291fn validate_xsd11_pattern_with_block_fallback(
292    value: &str,
293) -> FacetResult<std::borrow::Cow<'_, str>> {
294    let mut current: std::borrow::Cow<'_, str> = std::borrow::Cow::Borrowed(value);
295    for _ in 0..16 {
296        let err = match regexml::Regex::xsd(&current, "") {
297            Ok(_) => return Ok(current),
298            Err(e) => format!("{:?}", e),
299        };
300        const PREFIX: &str = "Unknown Unicode block: ";
301        let Some(start) = err.find(PREFIX) else {
302            return Err(FacetError::InvalidPattern {
303                pattern: value.to_string(),
304                message: err,
305            });
306        };
307        let after = &err[start + PREFIX.len()..];
308        let end = after
309            .find(|c: char| !c.is_alphanumeric() && c != '-' && c != '_' && c != ' ')
310            .unwrap_or(after.len());
311        let block = after[..end].trim();
312        if block.is_empty() {
313            return Err(FacetError::InvalidPattern {
314                pattern: value.to_string(),
315                message: err,
316            });
317        }
318        match rewrite_pattern_isblock_token(&current, block) {
319            Some(rewritten) => current = std::borrow::Cow::Owned(rewritten),
320            None => {
321                return Err(FacetError::InvalidPattern {
322                    pattern: value.to_string(),
323                    message: err,
324                });
325            }
326        }
327    }
328    // Loop bound exceeded; surface the final error if any.
329    if let Err(e) = regexml::Regex::xsd(&current, "") {
330        return Err(FacetError::InvalidPattern {
331            pattern: value.to_string(),
332            message: format!("{:?}", e),
333        });
334    }
335    Ok(current)
336}
337
338/// Rewrite every `\p{Is<block>}` / `\P{Is<block>}` token in `pattern` to a
339/// match-everything expression. Uses `[\s\S]` at atom position and `\s\S`
340/// inside a character class so the rewritten token is structurally valid in
341/// either context. Returns `None` if no rewrite happened.
342#[cfg(feature = "xsd11")]
343fn rewrite_pattern_isblock_token(pattern: &str, block_name: &str) -> Option<String> {
344    let inner_p = format!("p{{Is{}}}", block_name);
345    let inner_cap = format!("P{{Is{}}}", block_name);
346    let token_len = 1 + inner_p.len();
347    if !pattern.contains(&format!("\\{}", inner_p))
348        && !pattern.contains(&format!("\\{}", inner_cap))
349    {
350        return None;
351    }
352    let bytes = pattern.as_bytes();
353    let mut result = String::with_capacity(pattern.len());
354    let mut i = 0;
355    let mut in_class = false;
356    let mut found = false;
357    while i < bytes.len() {
358        if bytes[i] == b'\\' && i + token_len <= bytes.len() {
359            // Tokens are pure ASCII, so byte-slice comparison is safe here.
360            let candidate = &pattern[i + 1..i + token_len];
361            if candidate == inner_p || candidate == inner_cap {
362                if in_class {
363                    result.push_str("\\s\\S");
364                } else {
365                    result.push_str("[\\s\\S]");
366                }
367                i += token_len;
368                found = true;
369                continue;
370            }
371        }
372        let c = bytes[i];
373        if c == b'\\' && i + 1 < bytes.len() {
374            let next_len = pattern[i + 1..]
375                .chars()
376                .next()
377                .map(|ch| ch.len_utf8())
378                .unwrap_or(1);
379            result.push_str(&pattern[i..i + 1 + next_len]);
380            i += 1 + next_len;
381            continue;
382        }
383        if c == b'[' {
384            in_class = true;
385            result.push('[');
386            i += 1;
387            continue;
388        }
389        if c == b']' {
390            in_class = false;
391            result.push(']');
392            i += 1;
393            continue;
394        }
395        let next_len = pattern[i..]
396            .chars()
397            .next()
398            .map(|ch| ch.len_utf8())
399            .unwrap_or(1);
400        result.push_str(&pattern[i..i + next_len]);
401        i += next_len;
402    }
403    if found {
404        Some(result)
405    } else {
406        None
407    }
408}
409
410/// Enumeration facet (allowed values)
411#[derive(Debug, Clone)]
412pub struct EnumerationFacet {
413    /// Set of allowed values (as strings)
414    pub values: HashSet<String>,
415    pub source: Option<SourceRef>,
416}
417
418/// Whitespace facet
419#[derive(Debug, Clone)]
420pub struct WhitespaceFacet {
421    pub value: WhitespaceMode,
422    pub fixed: FacetFixed,
423    pub source: Option<SourceRef>,
424}
425
426/// MinInclusive facet (value >= bound)
427#[derive(Debug, Clone)]
428pub struct MinInclusiveFacet {
429    /// The bound as a string (type-specific interpretation during validation)
430    pub value: String,
431    pub fixed: FacetFixed,
432    pub source: Option<SourceRef>,
433}
434
435/// MaxInclusive facet (value <= bound)
436#[derive(Debug, Clone)]
437pub struct MaxInclusiveFacet {
438    pub value: String,
439    pub fixed: FacetFixed,
440    pub source: Option<SourceRef>,
441}
442
443/// MinExclusive facet (value > bound)
444#[derive(Debug, Clone)]
445pub struct MinExclusiveFacet {
446    pub value: String,
447    pub fixed: FacetFixed,
448    pub source: Option<SourceRef>,
449}
450
451/// MaxExclusive facet (value < bound)
452#[derive(Debug, Clone)]
453pub struct MaxExclusiveFacet {
454    pub value: String,
455    pub fixed: FacetFixed,
456    pub source: Option<SourceRef>,
457}
458
459/// TotalDigits facet (for decimal types)
460#[derive(Debug, Clone)]
461pub struct TotalDigitsFacet {
462    pub value: u32,
463    pub fixed: FacetFixed,
464    pub source: Option<SourceRef>,
465}
466
467/// FractionDigits facet (decimal places)
468#[derive(Debug, Clone)]
469pub struct FractionDigitsFacet {
470    pub value: u32,
471    pub fixed: FacetFixed,
472    pub source: Option<SourceRef>,
473}
474
475/// XSD 1.1: Assertion facet (XPath constraint on simple type values)
476#[derive(Debug, Clone)]
477pub struct AssertionFacet {
478    /// XPath 2.0 test expression
479    pub test: String,
480    /// Raw xpathDefaultNamespace attribute (resolved at evaluation time)
481    pub xpath_default_namespace: Option<String>,
482    /// Namespace bindings snapshot at parse time (for prefix resolution in XPath)
483    pub ns_snapshot: NamespaceContextSnapshot,
484    pub source: Option<SourceRef>,
485}
486
487/// XSD 1.1: ExplicitTimezone facet
488/// TODO: XSD 1.1 - Implement explicitTimezone constraint
489#[derive(Debug, Clone, Copy, PartialEq, Eq)]
490pub enum ExplicitTimezone {
491    Required,
492    Prohibited,
493    Optional,
494}
495
496/// XSD 1.1: ExplicitTimezone facet data
497#[derive(Debug, Clone)]
498pub struct ExplicitTimezoneFacet {
499    pub value: ExplicitTimezone,
500    pub fixed: FacetFixed,
501    pub source: Option<SourceRef>,
502}
503
504/// Complete set of facets for a simple type
505///
506/// A FacetSet collects all constraining facets that apply to a simple type.
507/// Facets are accumulated during type derivation.
508#[derive(Debug, Clone, Default)]
509pub struct FacetSet {
510    // String length facets
511    pub length: Option<LengthFacet>,
512    pub min_length: Option<MinLengthFacet>,
513    pub max_length: Option<MaxLengthFacet>,
514
515    // Pattern facets grouped by derivation step.
516    //
517    // Per XSD Datatypes Part 2 §4.3.4 (and the equivalent §A.2 prose),
518    // multiple `<xs:pattern>` facets in a single restriction step combine
519    // as alternation (logical OR), while patterns inherited from earlier
520    // derivation steps further restrict the value (logical AND).
521    //
522    // Outer Vec = AND across derivation steps; inner Vec = OR within a step.
523    pub patterns: Vec<Vec<PatternFacet>>,
524
525    // Enumeration (allowed values). The `Option` is only the presence flag;
526    // multi-valued semantics live inside `EnumerationFacet::values` (HashSet),
527    // so enumeration is exempt from st-props-correct.1 "no duplicate facet" (§3.16.2).
528    pub enumeration: Option<EnumerationFacet>,
529
530    // Whitespace handling
531    pub whitespace: Option<WhitespaceFacet>,
532
533    // Numeric range facets
534    pub min_inclusive: Option<MinInclusiveFacet>,
535    pub max_inclusive: Option<MaxInclusiveFacet>,
536    pub min_exclusive: Option<MinExclusiveFacet>,
537    pub max_exclusive: Option<MaxExclusiveFacet>,
538
539    // Decimal precision facets
540    pub total_digits: Option<TotalDigitsFacet>,
541    pub fraction_digits: Option<FractionDigitsFacet>,
542
543    // XSD 1.1 facets
544    // TODO: XSD 1.1 - These are parsed but not enforced in 1.0 mode
545    pub assertions: Vec<AssertionFacet>,
546    pub explicit_timezone: Option<ExplicitTimezoneFacet>,
547}
548
549impl FacetSet {
550    /// Create a new empty facet set
551    pub fn new() -> Self {
552        Self::default()
553    }
554
555    /// Check if the facet set is empty (no facets defined)
556    pub fn is_empty(&self) -> bool {
557        self.length.is_none()
558            && self.min_length.is_none()
559            && self.max_length.is_none()
560            && self.patterns.iter().all(|step| step.is_empty())
561            && self.enumeration.is_none()
562            && self.whitespace.is_none()
563            && self.min_inclusive.is_none()
564            && self.max_inclusive.is_none()
565            && self.min_exclusive.is_none()
566            && self.max_exclusive.is_none()
567            && self.total_digits.is_none()
568            && self.fraction_digits.is_none()
569            && self.assertions.is_empty()
570            && self.explicit_timezone.is_none()
571    }
572
573    /// Set length facet
574    pub fn set_length(&mut self, value: u64, fixed: FacetFixed, source: Option<SourceRef>) {
575        self.length = Some(LengthFacet {
576            value,
577            fixed,
578            source,
579        });
580    }
581
582    /// Set minLength facet
583    pub fn set_min_length(&mut self, value: u64, fixed: FacetFixed, source: Option<SourceRef>) {
584        self.min_length = Some(MinLengthFacet {
585            value,
586            fixed,
587            source,
588        });
589    }
590
591    /// Set maxLength facet
592    pub fn set_max_length(&mut self, value: u64, fixed: FacetFixed, source: Option<SourceRef>) {
593        self.max_length = Some(MaxLengthFacet {
594            value,
595            fixed,
596            source,
597        });
598    }
599
600    /// Add a pattern facet (compiles the pattern) at the current
601    /// derivation step. Multiple consecutive `add_pattern` calls on the
602    /// same FacetSet are treated as alternatives (OR'd) within a single
603    /// step; a new step is opened by `inherit_from` / `merge_with_base`.
604    pub fn add_pattern(
605        &mut self,
606        value: String,
607        source: Option<SourceRef>,
608        xsd_version: XsdVersion,
609        regex_compat: RegexCompat,
610    ) -> FacetResult<()> {
611        let pattern = PatternFacet::new(value, source, xsd_version, regex_compat)?;
612        self.push_pattern_to_current_step(pattern);
613        Ok(())
614    }
615
616    /// Add a pattern facet without compiling (for deferred validation),
617    /// at the current derivation step. See `add_pattern` for the OR/AND
618    /// semantics across multiple calls.
619    pub fn add_pattern_unchecked(&mut self, value: String, source: Option<SourceRef>) {
620        self.push_pattern_to_current_step(PatternFacet::new_unchecked(value, source));
621    }
622
623    fn push_pattern_to_current_step(&mut self, pattern: PatternFacet) {
624        if self.patterns.is_empty() {
625            self.patterns.push(Vec::new());
626        }
627        self.patterns.last_mut().unwrap().push(pattern);
628    }
629
630    /// Compile all uncompiled patterns. Returns the first error encountered.
631    ///
632    /// `xsd_version` selects the Unicode-category semantics for `\p{X}`: V1_0
633    /// pins to Unicode 3.0; V1_1 passes through to the backend.
634    /// `regex_compat` controls grammar leniency (see [`PatternFacet::compile`]).
635    pub fn compile_patterns(
636        &mut self,
637        xsd_version: XsdVersion,
638        regex_compat: RegexCompat,
639    ) -> FacetResult<()> {
640        for step in &mut self.patterns {
641            for pattern in step {
642                pattern.compile(xsd_version, regex_compat)?;
643            }
644        }
645        Ok(())
646    }
647
648    /// Run the per-step XSD pattern check on `value`: every step must have
649    /// at least one alternative that matches (within-step OR, across-step
650    /// AND). Returns the first failing step's first pattern as error
651    /// context.
652    fn check_patterns(&self, value: &str) -> FacetResult<()> {
653        for step in &self.patterns {
654            if step.is_empty() {
655                continue;
656            }
657            if !step.iter().any(|p| p.matches(value)) {
658                return Err(FacetError::pattern(value, &step[0].value));
659            }
660        }
661        Ok(())
662    }
663
664    /// Add an enumeration value
665    pub fn add_enumeration(&mut self, value: String, source: Option<SourceRef>) {
666        let enumeration = self.enumeration.get_or_insert_with(|| EnumerationFacet {
667            values: HashSet::new(),
668            source: source.clone(),
669        });
670        enumeration.values.insert(value);
671    }
672
673    /// Set whitespace facet
674    pub fn set_whitespace(
675        &mut self,
676        value: WhitespaceMode,
677        fixed: FacetFixed,
678        source: Option<SourceRef>,
679    ) {
680        self.whitespace = Some(WhitespaceFacet {
681            value,
682            fixed,
683            source,
684        });
685    }
686
687    /// Set minInclusive facet
688    pub fn set_min_inclusive(
689        &mut self,
690        value: String,
691        fixed: FacetFixed,
692        source: Option<SourceRef>,
693    ) {
694        self.min_inclusive = Some(MinInclusiveFacet {
695            value,
696            fixed,
697            source,
698        });
699    }
700
701    /// Set maxInclusive facet
702    pub fn set_max_inclusive(
703        &mut self,
704        value: String,
705        fixed: FacetFixed,
706        source: Option<SourceRef>,
707    ) {
708        self.max_inclusive = Some(MaxInclusiveFacet {
709            value,
710            fixed,
711            source,
712        });
713    }
714
715    /// Set minExclusive facet
716    pub fn set_min_exclusive(
717        &mut self,
718        value: String,
719        fixed: FacetFixed,
720        source: Option<SourceRef>,
721    ) {
722        self.min_exclusive = Some(MinExclusiveFacet {
723            value,
724            fixed,
725            source,
726        });
727    }
728
729    /// Set maxExclusive facet
730    pub fn set_max_exclusive(
731        &mut self,
732        value: String,
733        fixed: FacetFixed,
734        source: Option<SourceRef>,
735    ) {
736        self.max_exclusive = Some(MaxExclusiveFacet {
737            value,
738            fixed,
739            source,
740        });
741    }
742
743    /// Set totalDigits facet
744    pub fn set_total_digits(&mut self, value: u32, fixed: FacetFixed, source: Option<SourceRef>) {
745        self.total_digits = Some(TotalDigitsFacet {
746            value,
747            fixed,
748            source,
749        });
750    }
751
752    /// Set fractionDigits facet
753    pub fn set_fraction_digits(
754        &mut self,
755        value: u32,
756        fixed: FacetFixed,
757        source: Option<SourceRef>,
758    ) {
759        self.fraction_digits = Some(FractionDigitsFacet {
760            value,
761            fixed,
762            source,
763        });
764    }
765
766    /// Add an assertion facet (XSD 1.1)
767    pub fn add_assertion(
768        &mut self,
769        test: String,
770        xpath_default_namespace: Option<String>,
771        ns_snapshot: NamespaceContextSnapshot,
772        source: Option<SourceRef>,
773    ) {
774        self.assertions.push(AssertionFacet {
775            test,
776            xpath_default_namespace,
777            ns_snapshot,
778            source,
779        });
780    }
781
782    /// Set explicitTimezone facet (XSD 1.1)
783    pub fn set_explicit_timezone(
784        &mut self,
785        value: ExplicitTimezone,
786        fixed: FacetFixed,
787        source: Option<SourceRef>,
788    ) {
789        self.explicit_timezone = Some(ExplicitTimezoneFacet {
790            value,
791            fixed,
792            source,
793        });
794    }
795
796    /// Merge facets from a base type (for type derivation by restriction)
797    ///
798    /// Inherited facets are only set if not already defined in this facet set.
799    /// The `fixed` attribute is preserved from the base type.
800    ///
801    /// Note: This method does not validate that derived facets are more restrictive.
802    /// Use `merge_with_base()` for full validation.
803    pub fn inherit_from(&mut self, base: &FacetSet) {
804        // String length facets
805        if self.length.is_none() {
806            self.length = base.length.clone();
807        }
808        if self.min_length.is_none() {
809            self.min_length = base.min_length.clone();
810        }
811        if self.max_length.is_none() {
812            self.max_length = base.max_length.clone();
813        }
814
815        // Each base derivation step is appended as a new outer step on
816        // self, preserving the within-step OR / across-step AND structure.
817        // (Step-level dedup vs. derived's local step would require value-set
818        // equivalence checks; skip it — repeated identical patterns are
819        // idempotent under either OR or AND, and recompilation cost is
820        // bounded by `compile_patterns` running per FacetSet at most once.)
821        for base_step in &base.patterns {
822            if !base_step.is_empty() {
823                self.patterns.push(base_step.clone());
824            }
825        }
826
827        // Whitespace
828        if self.whitespace.is_none() {
829            self.whitespace = base.whitespace.clone();
830        }
831
832        // Numeric bounds
833        if self.min_inclusive.is_none() {
834            self.min_inclusive = base.min_inclusive.clone();
835        }
836        if self.max_inclusive.is_none() {
837            self.max_inclusive = base.max_inclusive.clone();
838        }
839        if self.min_exclusive.is_none() {
840            self.min_exclusive = base.min_exclusive.clone();
841        }
842        if self.max_exclusive.is_none() {
843            self.max_exclusive = base.max_exclusive.clone();
844        }
845
846        // Decimal precision
847        if self.total_digits.is_none() {
848            self.total_digits = base.total_digits.clone();
849        }
850        if self.fraction_digits.is_none() {
851            self.fraction_digits = base.fraction_digits.clone();
852        }
853
854        // XSD 1.1 assertions are cumulative
855        for assertion in &base.assertions {
856            self.assertions.push(assertion.clone());
857        }
858
859        if self.explicit_timezone.is_none() {
860            self.explicit_timezone = base.explicit_timezone.clone();
861        }
862    }
863
864    /// Merge base type facets with derived type facets, validating derivation rules.
865    ///
866    /// This method enforces XSD derivation by restriction rules:
867    /// - Fixed facets cannot be overridden with different values
868    /// - Derived facets must be more restrictive than base facets
869    /// - Patterns are cumulative (ANDed together)
870    /// - Enumerations must be subsets of base enumerations
871    ///
872    /// Returns a new FacetSet combining base and derived facets, or an error
873    /// if the derivation rules are violated.
874    pub fn merge_with_base(&self, base: &FacetSet) -> FacetResult<FacetSet> {
875        // XSD Datatypes Part 2 §4.3.1.4 / §4.3.2.4 / §4.3.3.4 same-step rule:
876        // It is an error for both `length` and `minLength` (or `length` and
877        // `maxLength`) to be members of {facets} in the same derivation step.
878        // `self` represents this step's locally declared facets before the
879        // base merge, so this is the correct moment to detect the conflict.
880        if self.length.is_some() && self.min_length.is_some() {
881            return Err(FacetError::conflicting(
882                "length and minLength cannot both appear in the same restriction step",
883            ));
884        }
885        if self.length.is_some() && self.max_length.is_some() {
886            return Err(FacetError::conflicting(
887                "length and maxLength cannot both appear in the same restriction step",
888            ));
889        }
890
891        let mut result = self.clone();
892
893        // === Length facets ===
894        // Validate and merge length facet
895        if let Some(ref base_length) = base.length {
896            match &result.length {
897                Some(derived) => {
898                    // Fixed length cannot be changed
899                    if base_length.fixed == FacetFixed::Fixed && derived.value != base_length.value
900                    {
901                        return Err(FacetError::fixed_violation(
902                            "length",
903                            base_length.value.to_string(),
904                            derived.value.to_string(),
905                        ));
906                    }
907                }
908                None => {
909                    result.length = Some(base_length.clone());
910                }
911            }
912        }
913
914        // Validate and merge minLength facet
915        if let Some(ref base_min) = base.min_length {
916            match &result.min_length {
917                Some(derived) => {
918                    if base_min.fixed == FacetFixed::Fixed && derived.value != base_min.value {
919                        return Err(FacetError::fixed_violation(
920                            "minLength",
921                            base_min.value.to_string(),
922                            derived.value.to_string(),
923                        ));
924                    }
925                    // Derived minLength must be >= base minLength
926                    if derived.value < base_min.value {
927                        return Err(FacetError::derivation(format!(
928                            "minLength {} is less restrictive than base minLength {}",
929                            derived.value, base_min.value
930                        )));
931                    }
932                }
933                None => {
934                    result.min_length = Some(base_min.clone());
935                }
936            }
937        }
938
939        // Validate and merge maxLength facet
940        if let Some(ref base_max) = base.max_length {
941            match &result.max_length {
942                Some(derived) => {
943                    if base_max.fixed == FacetFixed::Fixed && derived.value != base_max.value {
944                        return Err(FacetError::fixed_violation(
945                            "maxLength",
946                            base_max.value.to_string(),
947                            derived.value.to_string(),
948                        ));
949                    }
950                    // Derived maxLength must be <= base maxLength
951                    if derived.value > base_max.value {
952                        return Err(FacetError::derivation(format!(
953                            "maxLength {} is less restrictive than base maxLength {}",
954                            derived.value, base_max.value
955                        )));
956                    }
957                }
958                None => {
959                    result.max_length = Some(base_max.clone());
960                }
961            }
962        }
963
964        // === Patterns ===
965        // Each base derivation step is appended as a new outer step on
966        // result. Within-step OR / across-step AND semantics survive the
967        // merge per XSD Datatypes §4.3.4.
968        for base_step in &base.patterns {
969            if !base_step.is_empty() {
970                result.patterns.push(base_step.clone());
971            }
972        }
973
974        // === Enumeration ===
975        // If base has enumeration, derived must be a subset (or not specify enumeration)
976        if let Some(ref base_enum) = base.enumeration {
977            match &result.enumeration {
978                Some(derived_enum) => {
979                    // Check that derived values are subset of base values
980                    for value in &derived_enum.values {
981                        if !base_enum.values.contains(value) {
982                            return Err(FacetError::derivation(format!(
983                                "enumeration value '{}' is not in base enumeration",
984                                value
985                            )));
986                        }
987                    }
988                }
989                None => {
990                    // Inherit base enumeration
991                    result.enumeration = Some(base_enum.clone());
992                }
993            }
994        }
995
996        // === Whitespace ===
997        if let Some(ref base_ws) = base.whitespace {
998            match &result.whitespace {
999                Some(derived) => {
1000                    if base_ws.fixed == FacetFixed::Fixed && derived.value != base_ws.value {
1001                        return Err(FacetError::fixed_violation(
1002                            "whiteSpace",
1003                            format!("{:?}", base_ws.value),
1004                            format!("{:?}", derived.value),
1005                        ));
1006                    }
1007                    // Whitespace can only become more restrictive:
1008                    // preserve -> replace -> collapse
1009                    if !is_whitespace_more_restrictive(derived.value, base_ws.value) {
1010                        return Err(FacetError::derivation(format!(
1011                            "whiteSpace {:?} is less restrictive than base {:?}",
1012                            derived.value, base_ws.value
1013                        )));
1014                    }
1015                }
1016                None => {
1017                    result.whitespace = Some(base_ws.clone());
1018                }
1019            }
1020        }
1021
1022        // === Numeric bounds ===
1023        // Note: Full numeric comparison would require parsing the values
1024        // For now, we check fixed constraints and inherit missing values.
1025        //
1026        // A derived type may switch between Inclusive and Exclusive for the same bound
1027        // (e.g., base has minInclusive, derived has minExclusive).  Per cos-st-restricts,
1028        // only the derived facet applies, so we must NOT inherit the base facet when the
1029        // derived type already supplies the complementary one.
1030        if let Some(ref base_facet) = base.min_inclusive {
1031            if let Some(ref derived) = result.min_inclusive {
1032                if base_facet.fixed == FacetFixed::Fixed && derived.value != base_facet.value {
1033                    return Err(FacetError::fixed_violation(
1034                        "minInclusive",
1035                        &base_facet.value,
1036                        &derived.value,
1037                    ));
1038                }
1039            } else if result.min_exclusive.is_none() {
1040                // Only inherit if derived hasn't replaced it with minExclusive
1041                result.min_inclusive = Some(base_facet.clone());
1042            }
1043        }
1044
1045        if let Some(ref base_facet) = base.max_inclusive {
1046            if let Some(ref derived) = result.max_inclusive {
1047                if base_facet.fixed == FacetFixed::Fixed && derived.value != base_facet.value {
1048                    return Err(FacetError::fixed_violation(
1049                        "maxInclusive",
1050                        &base_facet.value,
1051                        &derived.value,
1052                    ));
1053                }
1054            } else if result.max_exclusive.is_none() {
1055                // Only inherit if derived hasn't replaced it with maxExclusive
1056                result.max_inclusive = Some(base_facet.clone());
1057            }
1058        }
1059
1060        if let Some(ref base_facet) = base.min_exclusive {
1061            if let Some(ref derived) = result.min_exclusive {
1062                if base_facet.fixed == FacetFixed::Fixed && derived.value != base_facet.value {
1063                    return Err(FacetError::fixed_violation(
1064                        "minExclusive",
1065                        &base_facet.value,
1066                        &derived.value,
1067                    ));
1068                }
1069            } else if result.min_inclusive.is_none() {
1070                // Only inherit if derived hasn't replaced it with minInclusive
1071                result.min_exclusive = Some(base_facet.clone());
1072            }
1073        }
1074
1075        if let Some(ref base_facet) = base.max_exclusive {
1076            if let Some(ref derived) = result.max_exclusive {
1077                if base_facet.fixed == FacetFixed::Fixed && derived.value != base_facet.value {
1078                    return Err(FacetError::fixed_violation(
1079                        "maxExclusive",
1080                        &base_facet.value,
1081                        &derived.value,
1082                    ));
1083                }
1084            } else if result.max_inclusive.is_none() {
1085                // Only inherit if derived hasn't replaced it with maxInclusive
1086                result.max_exclusive = Some(base_facet.clone());
1087            }
1088        }
1089
1090        // === Digit facets ===
1091        if let Some(ref base_td) = base.total_digits {
1092            match &result.total_digits {
1093                Some(derived) => {
1094                    if base_td.fixed == FacetFixed::Fixed && derived.value != base_td.value {
1095                        return Err(FacetError::fixed_violation(
1096                            "totalDigits",
1097                            base_td.value.to_string(),
1098                            derived.value.to_string(),
1099                        ));
1100                    }
1101                    // Derived totalDigits must be <= base totalDigits
1102                    if derived.value > base_td.value {
1103                        return Err(FacetError::derivation(format!(
1104                            "totalDigits {} is less restrictive than base totalDigits {}",
1105                            derived.value, base_td.value
1106                        )));
1107                    }
1108                }
1109                None => {
1110                    result.total_digits = Some(base_td.clone());
1111                }
1112            }
1113        }
1114
1115        if let Some(ref base_fd) = base.fraction_digits {
1116            match &result.fraction_digits {
1117                Some(derived) => {
1118                    if base_fd.fixed == FacetFixed::Fixed && derived.value != base_fd.value {
1119                        return Err(FacetError::fixed_violation(
1120                            "fractionDigits",
1121                            base_fd.value.to_string(),
1122                            derived.value.to_string(),
1123                        ));
1124                    }
1125                    // Derived fractionDigits must be <= base fractionDigits
1126                    if derived.value > base_fd.value {
1127                        return Err(FacetError::derivation(format!(
1128                            "fractionDigits {} is less restrictive than base fractionDigits {}",
1129                            derived.value, base_fd.value
1130                        )));
1131                    }
1132                }
1133                None => {
1134                    result.fraction_digits = Some(base_fd.clone());
1135                }
1136            }
1137        }
1138
1139        // === XSD 1.1 facets ===
1140        // Assertions are cumulative
1141        for assertion in &base.assertions {
1142            result.assertions.push(assertion.clone());
1143        }
1144
1145        // ExplicitTimezone — §4.3.16 Valid explicitTimezone Restrictions:
1146        //   base=optional   → derived ∈ {optional, required, prohibited}
1147        //   base=required   → derived ∈ {required}
1148        //   base=prohibited → derived ∈ {prohibited}
1149        // This restriction is independent of {fixed}; fixed adds only a
1150        // stronger value-equality requirement on top.
1151        if let Some(ref base_etz) = base.explicit_timezone {
1152            if let Some(ref derived) = result.explicit_timezone {
1153                let restriction_ok = match base_etz.value {
1154                    ExplicitTimezone::Optional => true,
1155                    ExplicitTimezone::Required => derived.value == ExplicitTimezone::Required,
1156                    ExplicitTimezone::Prohibited => derived.value == ExplicitTimezone::Prohibited,
1157                };
1158                if !restriction_ok {
1159                    return Err(FacetError::derivation(format!(
1160                        "explicitTimezone {:?} is not a valid restriction of base {:?}",
1161                        derived.value, base_etz.value
1162                    )));
1163                }
1164                if base_etz.fixed == FacetFixed::Fixed && derived.value != base_etz.value {
1165                    return Err(FacetError::fixed_violation(
1166                        "explicitTimezone",
1167                        format!("{:?}", base_etz.value),
1168                        format!("{:?}", derived.value),
1169                    ));
1170                }
1171            } else {
1172                result.explicit_timezone = Some(base_etz.clone());
1173            }
1174        }
1175
1176        // === Validate conflicting facets ===
1177        result.validate_consistency()?;
1178
1179        Ok(result)
1180    }
1181
1182    /// Validate internal consistency of facets
1183    fn validate_consistency(&self) -> FacetResult<()> {
1184        // Check minLength <= maxLength
1185        if let (Some(min), Some(max)) = (&self.min_length, &self.max_length) {
1186            if min.value > max.value {
1187                return Err(FacetError::conflicting(format!(
1188                    "minLength {} is greater than maxLength {}",
1189                    min.value, max.value
1190                )));
1191            }
1192        }
1193
1194        // Check length conflicts with minLength/maxLength
1195        if let Some(len) = &self.length {
1196            if let Some(min) = &self.min_length {
1197                if len.value < min.value {
1198                    return Err(FacetError::conflicting(format!(
1199                        "length {} is less than minLength {}",
1200                        len.value, min.value
1201                    )));
1202                }
1203            }
1204            if let Some(max) = &self.max_length {
1205                if len.value > max.value {
1206                    return Err(FacetError::conflicting(format!(
1207                        "length {} is greater than maxLength {}",
1208                        len.value, max.value
1209                    )));
1210                }
1211            }
1212        }
1213
1214        // Check minInclusive <= maxInclusive (string comparison, approximate)
1215        // Note: Full validation would require parsing the numeric values
1216        if self.min_inclusive.is_some() && self.min_exclusive.is_some() {
1217            return Err(FacetError::conflicting(
1218                "cannot have both minInclusive and minExclusive",
1219            ));
1220        }
1221        if self.max_inclusive.is_some() && self.max_exclusive.is_some() {
1222            return Err(FacetError::conflicting(
1223                "cannot have both maxInclusive and maxExclusive",
1224            ));
1225        }
1226
1227        // Check fractionDigits <= totalDigits
1228        if let (Some(fd), Some(td)) = (&self.fraction_digits, &self.total_digits) {
1229            if fd.value > td.value {
1230                return Err(FacetError::conflicting(format!(
1231                    "fractionDigits {} is greater than totalDigits {}",
1232                    fd.value, td.value
1233                )));
1234            }
1235        }
1236
1237        // Check numeric bound consistency (minInclusive vs maxInclusive, etc.)
1238        // Uses decimal parsing for numeric comparison
1239        if let (Some(min_incl), Some(max_incl)) = (&self.min_inclusive, &self.max_inclusive) {
1240            if let Some(cmp) = compare_decimal_strings(&min_incl.value, &max_incl.value) {
1241                if cmp == std::cmp::Ordering::Greater {
1242                    return Err(FacetError::conflicting(format!(
1243                        "minInclusive '{}' is greater than maxInclusive '{}'",
1244                        min_incl.value, max_incl.value
1245                    )));
1246                }
1247            }
1248        }
1249        if let (Some(min_excl), Some(max_excl)) = (&self.min_exclusive, &self.max_exclusive) {
1250            if let Some(cmp) = compare_decimal_strings(&min_excl.value, &max_excl.value) {
1251                if cmp != std::cmp::Ordering::Less {
1252                    return Err(FacetError::conflicting(format!(
1253                        "minExclusive '{}' must be less than maxExclusive '{}'",
1254                        min_excl.value, max_excl.value
1255                    )));
1256                }
1257            }
1258        }
1259        if let (Some(min_incl), Some(max_excl)) = (&self.min_inclusive, &self.max_exclusive) {
1260            if let Some(cmp) = compare_decimal_strings(&min_incl.value, &max_excl.value) {
1261                if cmp != std::cmp::Ordering::Less {
1262                    return Err(FacetError::conflicting(format!(
1263                        "minInclusive '{}' must be less than maxExclusive '{}'",
1264                        min_incl.value, max_excl.value
1265                    )));
1266                }
1267            }
1268        }
1269        if let (Some(min_excl), Some(max_incl)) = (&self.min_exclusive, &self.max_inclusive) {
1270            if let Some(cmp) = compare_decimal_strings(&min_excl.value, &max_incl.value) {
1271                if cmp != std::cmp::Ordering::Less {
1272                    return Err(FacetError::conflicting(format!(
1273                        "minExclusive '{}' must be less than maxInclusive '{}'",
1274                        min_excl.value, max_incl.value
1275                    )));
1276                }
1277            }
1278        }
1279
1280        Ok(())
1281    }
1282
1283    /// Validate a string value against all applicable facets
1284    ///
1285    /// This validates length, pattern, enumeration, and whitespace facets.
1286    /// Numeric bounds and digit facets require parsed values and are not
1287    /// validated by this method.
1288    pub fn validate_string(&self, value: &str) -> FacetResult<()> {
1289        // Apply whitespace normalization for length calculation
1290        let normalized = match &self.whitespace {
1291            Some(ws) => normalize_whitespace(value, ws.value),
1292            None => value.to_string(),
1293        };
1294        let check_value = &normalized;
1295
1296        // Check length facet
1297        if let Some(ref length) = self.length {
1298            let len = check_value.chars().count() as u64;
1299            if len != length.value {
1300                return Err(FacetError::length(format!(
1301                    "value length {} does not equal required length {}",
1302                    len, length.value
1303                )));
1304            }
1305        }
1306
1307        // Check minLength facet
1308        if let Some(ref min_length) = self.min_length {
1309            let len = check_value.chars().count() as u64;
1310            if len < min_length.value {
1311                return Err(FacetError::MinLengthViolation {
1312                    actual: len,
1313                    min: min_length.value,
1314                });
1315            }
1316        }
1317
1318        // Check maxLength facet
1319        if let Some(ref max_length) = self.max_length {
1320            let len = check_value.chars().count() as u64;
1321            if len > max_length.value {
1322                return Err(FacetError::MaxLengthViolation {
1323                    actual: len,
1324                    max: max_length.value,
1325                });
1326            }
1327        }
1328
1329        // Check all pattern steps (each step's alternatives are OR'd; all
1330        // steps must match).
1331        self.check_patterns(check_value)?;
1332
1333        // Check enumeration
1334        if let Some(ref enumeration) = self.enumeration {
1335            if !enumeration.values.contains(check_value) {
1336                return Err(FacetError::enumeration(check_value));
1337            }
1338        }
1339
1340        Ok(())
1341    }
1342
1343    /// Validate only pattern and enumeration facets on a string value.
1344    /// Used for list types where length facets are checked separately as item count.
1345    pub fn validate_string_patterns_enums(&self, value: &str) -> FacetResult<()> {
1346        let normalized = match &self.whitespace {
1347            Some(ws) => normalize_whitespace(value, ws.value),
1348            None => value.to_string(),
1349        };
1350        let check_value = &normalized;
1351
1352        self.check_patterns(check_value)?;
1353
1354        if let Some(ref enumeration) = self.enumeration {
1355            if !enumeration.values.contains(check_value) {
1356                return Err(FacetError::enumeration(check_value));
1357            }
1358        }
1359
1360        Ok(())
1361    }
1362
1363    /// Validate only pattern facets (no enumeration, no length).
1364    /// Used when enumeration must be checked in value space rather than lexically.
1365    pub fn validate_patterns_only(&self, value: &str) -> FacetResult<()> {
1366        let normalized = match &self.whitespace {
1367            Some(ws) => normalize_whitespace(value, ws.value),
1368            None => value.to_string(),
1369        };
1370        self.check_patterns(&normalized)
1371    }
1372
1373    /// Validate enumeration in value space using a caller-supplied match predicate.
1374    /// `is_match(enum_str)` returns true if the instance value equals the given
1375    /// enumeration lexical value. `display` is used in the error message on failure.
1376    pub fn validate_enum_value_space(
1377        &self,
1378        is_match: impl Fn(&str) -> bool,
1379        display: &str,
1380    ) -> FacetResult<()> {
1381        if let Some(ref enumeration) = self.enumeration {
1382            if !enumeration.values.iter().any(|s| is_match(s)) {
1383                return Err(FacetError::enumeration(display));
1384            }
1385        }
1386        Ok(())
1387    }
1388
1389    /// Validate a decimal value against numeric facets
1390    pub fn validate_decimal(&self, value: &rust_decimal::Decimal) -> FacetResult<()> {
1391        // Check totalDigits
1392        if let Some(ref td) = self.total_digits {
1393            let total = count_total_digits(value);
1394            if total > td.value {
1395                return Err(FacetError::TotalDigitsViolation {
1396                    actual: total,
1397                    max: td.value,
1398                });
1399            }
1400        }
1401
1402        // Check fractionDigits
1403        if let Some(ref fd) = self.fraction_digits {
1404            let frac = count_fraction_digits(value);
1405            if frac > fd.value {
1406                return Err(FacetError::FractionDigitsViolation {
1407                    actual: frac,
1408                    max: fd.value,
1409                });
1410            }
1411        }
1412
1413        // Check numeric bounds
1414        if let Some(ref min) = self.min_inclusive {
1415            if let Ok(bound) = rust_decimal::Decimal::from_str_exact(&min.value) {
1416                if *value < bound {
1417                    return Err(FacetError::MinInclusiveViolation {
1418                        value: value.to_string(),
1419                        min: min.value.clone(),
1420                    });
1421                }
1422            }
1423        }
1424
1425        if let Some(ref max) = self.max_inclusive {
1426            if let Ok(bound) = rust_decimal::Decimal::from_str_exact(&max.value) {
1427                if *value > bound {
1428                    return Err(FacetError::MaxInclusiveViolation {
1429                        value: value.to_string(),
1430                        max: max.value.clone(),
1431                    });
1432                }
1433            }
1434        }
1435
1436        if let Some(ref min) = self.min_exclusive {
1437            if let Ok(bound) = rust_decimal::Decimal::from_str_exact(&min.value) {
1438                if *value <= bound {
1439                    return Err(FacetError::MinExclusiveViolation {
1440                        value: value.to_string(),
1441                        min: min.value.clone(),
1442                    });
1443                }
1444            }
1445        }
1446
1447        if let Some(ref max) = self.max_exclusive {
1448            if let Ok(bound) = rust_decimal::Decimal::from_str_exact(&max.value) {
1449                if *value >= bound {
1450                    return Err(FacetError::MaxExclusiveViolation {
1451                        value: value.to_string(),
1452                        max: max.value.clone(),
1453                    });
1454                }
1455            }
1456        }
1457
1458        Ok(())
1459    }
1460
1461    /// Validate a float value against numeric bounds facets
1462    pub fn validate_float(&self, value: f32) -> FacetResult<()> {
1463        // NaN doesn't compare normally, so skip bounds checking for NaN
1464        if value.is_nan() {
1465            return Ok(());
1466        }
1467
1468        // Check numeric bounds
1469        if let Some(ref min) = self.min_inclusive {
1470            if let Ok(bound) = min.value.parse::<f32>() {
1471                if !bound.is_nan() && value < bound {
1472                    return Err(FacetError::MinInclusiveViolation {
1473                        value: format_float_for_error(value),
1474                        min: min.value.clone(),
1475                    });
1476                }
1477            }
1478        }
1479
1480        if let Some(ref max) = self.max_inclusive {
1481            if let Ok(bound) = max.value.parse::<f32>() {
1482                if !bound.is_nan() && value > bound {
1483                    return Err(FacetError::MaxInclusiveViolation {
1484                        value: format_float_for_error(value),
1485                        max: max.value.clone(),
1486                    });
1487                }
1488            }
1489        }
1490
1491        if let Some(ref min) = self.min_exclusive {
1492            if let Ok(bound) = min.value.parse::<f32>() {
1493                if !bound.is_nan() && value <= bound {
1494                    return Err(FacetError::MinExclusiveViolation {
1495                        value: format_float_for_error(value),
1496                        min: min.value.clone(),
1497                    });
1498                }
1499            }
1500        }
1501
1502        if let Some(ref max) = self.max_exclusive {
1503            if let Ok(bound) = max.value.parse::<f32>() {
1504                if !bound.is_nan() && value >= bound {
1505                    return Err(FacetError::MaxExclusiveViolation {
1506                        value: format_float_for_error(value),
1507                        max: max.value.clone(),
1508                    });
1509                }
1510            }
1511        }
1512
1513        Ok(())
1514    }
1515
1516    /// Validate a double value against numeric bounds facets
1517    pub fn validate_double(&self, value: f64) -> FacetResult<()> {
1518        // NaN doesn't compare normally, so skip bounds checking for NaN
1519        if value.is_nan() {
1520            return Ok(());
1521        }
1522
1523        // Check numeric bounds
1524        if let Some(ref min) = self.min_inclusive {
1525            if let Ok(bound) = min.value.parse::<f64>() {
1526                if !bound.is_nan() && value < bound {
1527                    return Err(FacetError::MinInclusiveViolation {
1528                        value: format_double_for_error(value),
1529                        min: min.value.clone(),
1530                    });
1531                }
1532            }
1533        }
1534
1535        if let Some(ref max) = self.max_inclusive {
1536            if let Ok(bound) = max.value.parse::<f64>() {
1537                if !bound.is_nan() && value > bound {
1538                    return Err(FacetError::MaxInclusiveViolation {
1539                        value: format_double_for_error(value),
1540                        max: max.value.clone(),
1541                    });
1542                }
1543            }
1544        }
1545
1546        if let Some(ref min) = self.min_exclusive {
1547            if let Ok(bound) = min.value.parse::<f64>() {
1548                if !bound.is_nan() && value <= bound {
1549                    return Err(FacetError::MinExclusiveViolation {
1550                        value: format_double_for_error(value),
1551                        min: min.value.clone(),
1552                    });
1553                }
1554            }
1555        }
1556
1557        if let Some(ref max) = self.max_exclusive {
1558            if let Ok(bound) = max.value.parse::<f64>() {
1559                if !bound.is_nan() && value >= bound {
1560                    return Err(FacetError::MaxExclusiveViolation {
1561                        value: format_double_for_error(value),
1562                        max: max.value.clone(),
1563                    });
1564                }
1565            }
1566        }
1567
1568        Ok(())
1569    }
1570
1571    /// Validate explicitTimezone constraint (XSD 1.1)
1572    ///
1573    /// # Arguments
1574    /// * `has_timezone` - Whether the value has a timezone specified
1575    pub fn validate_explicit_timezone(&self, has_timezone: bool) -> FacetResult<()> {
1576        if let Some(ref etz) = self.explicit_timezone {
1577            match etz.value {
1578                ExplicitTimezone::Required if !has_timezone => {
1579                    return Err(FacetError::ExplicitTimezoneViolation {
1580                        message: "timezone is required but not present".to_string(),
1581                    });
1582                }
1583                ExplicitTimezone::Prohibited if has_timezone => {
1584                    return Err(FacetError::ExplicitTimezoneViolation {
1585                        message: "timezone is prohibited but present".to_string(),
1586                    });
1587                }
1588                ExplicitTimezone::Optional
1589                | ExplicitTimezone::Required
1590                | ExplicitTimezone::Prohibited => {
1591                    // Valid
1592                }
1593            }
1594        }
1595        Ok(())
1596    }
1597
1598    /// Validate a binary value (hex or base64) against length facets
1599    pub fn validate_binary_length(&self, byte_count: u64) -> FacetResult<()> {
1600        // For binary types, length is measured in octets
1601        if let Some(ref length) = self.length {
1602            if byte_count != length.value {
1603                return Err(FacetError::length(format!(
1604                    "binary length {} does not equal required length {}",
1605                    byte_count, length.value
1606                )));
1607            }
1608        }
1609
1610        if let Some(ref min_length) = self.min_length {
1611            if byte_count < min_length.value {
1612                return Err(FacetError::MinLengthViolation {
1613                    actual: byte_count,
1614                    min: min_length.value,
1615                });
1616            }
1617        }
1618
1619        if let Some(ref max_length) = self.max_length {
1620            if byte_count > max_length.value {
1621                return Err(FacetError::MaxLengthViolation {
1622                    actual: byte_count,
1623                    max: max_length.value,
1624                });
1625            }
1626        }
1627
1628        Ok(())
1629    }
1630
1631    /// Validate a list value against length facets (item count)
1632    pub fn validate_list_length(&self, item_count: u64) -> FacetResult<()> {
1633        // For list types, length is measured in number of items
1634        if let Some(ref length) = self.length {
1635            if item_count != length.value {
1636                return Err(FacetError::length(format!(
1637                    "list length {} does not equal required length {}",
1638                    item_count, length.value
1639                )));
1640            }
1641        }
1642
1643        if let Some(ref min_length) = self.min_length {
1644            if item_count < min_length.value {
1645                return Err(FacetError::MinLengthViolation {
1646                    actual: item_count,
1647                    min: min_length.value,
1648                });
1649            }
1650        }
1651
1652        if let Some(ref max_length) = self.max_length {
1653            if item_count > max_length.value {
1654                return Err(FacetError::MaxLengthViolation {
1655                    actual: item_count,
1656                    max: max_length.value,
1657                });
1658            }
1659        }
1660
1661        Ok(())
1662    }
1663}
1664
1665/// Check if derived whitespace mode is more restrictive than base
1666fn is_whitespace_more_restrictive(derived: WhitespaceMode, base: WhitespaceMode) -> bool {
1667    use WhitespaceMode::*;
1668    match (base, derived) {
1669        // Same is always OK
1670        (Preserve, Preserve) | (Replace, Replace) | (Collapse, Collapse) => true,
1671        // preserve -> replace -> collapse is more restrictive
1672        (Preserve, Replace) | (Preserve, Collapse) | (Replace, Collapse) => true,
1673        // Going the other way is less restrictive
1674        _ => false,
1675    }
1676}
1677
1678/// Compare two strings as decimal/integer values.
1679/// Returns None if either string cannot be parsed as a number.
1680fn compare_decimal_strings(a: &str, b: &str) -> Option<std::cmp::Ordering> {
1681    // Try parsing as f64 for general numeric comparison
1682    let a_val: f64 = a.trim().parse().ok()?;
1683    let b_val: f64 = b.trim().parse().ok()?;
1684    a_val.partial_cmp(&b_val)
1685}
1686
1687/// Apply whitespace normalization to a string
1688pub fn normalize_whitespace(s: &str, mode: WhitespaceMode) -> String {
1689    match mode {
1690        WhitespaceMode::Preserve => s.to_string(),
1691        WhitespaceMode::Replace => {
1692            // Replace tab, CR, LF with space
1693            s.chars()
1694                .map(|c| match c {
1695                    '\t' | '\r' | '\n' => ' ',
1696                    _ => c,
1697                })
1698                .collect()
1699        }
1700        WhitespaceMode::Collapse => {
1701            // Replace, then collapse consecutive spaces, then trim
1702            let replaced: String = s
1703                .chars()
1704                .map(|c| match c {
1705                    '\t' | '\r' | '\n' => ' ',
1706                    _ => c,
1707                })
1708                .collect();
1709
1710            let mut result = String::with_capacity(replaced.len());
1711            let mut prev_space = true; // Start true to trim leading spaces
1712
1713            for c in replaced.chars() {
1714                if c == ' ' {
1715                    if !prev_space {
1716                        result.push(' ');
1717                        prev_space = true;
1718                    }
1719                } else {
1720                    result.push(c);
1721                    prev_space = false;
1722                }
1723            }
1724
1725            // Trim trailing space
1726            if result.ends_with(' ') {
1727                result.pop();
1728            }
1729
1730            result
1731        }
1732    }
1733}
1734
1735/// Count total significant digits in a decimal value.
1736///
1737/// Per Datatypes Part 2 §4.3.11.4 the `totalDigits` constraint counts the
1738/// digits of the unscaled integer mantissa `j` in the canonical
1739/// representation `i = j × 10^-k` (with leading zeros excluded). For
1740/// example `0.12345 = 12345 × 10^-5` has totalDigits = 5 — the leading `0`
1741/// before the decimal point in the lexical form must not be counted.
1742fn count_total_digits(value: &rust_decimal::Decimal) -> u32 {
1743    let normalized = value.abs().normalize();
1744    let mut m = normalized.mantissa().unsigned_abs();
1745    if m == 0 {
1746        return 1;
1747    }
1748    let mut count = 0u32;
1749    while m > 0 {
1750        count += 1;
1751        m /= 10;
1752    }
1753    count
1754}
1755
1756/// Count fraction digits in a decimal value
1757fn count_fraction_digits(value: &rust_decimal::Decimal) -> u32 {
1758    let s = value.normalize().to_string();
1759    match s.find('.') {
1760        Some(pos) => (s.len() - pos - 1) as u32,
1761        None => 0,
1762    }
1763}
1764
1765/// Format a float value for error messages (XSD canonical form)
1766fn format_float_for_error(v: f32) -> String {
1767    if v.is_nan() {
1768        "NaN".to_string()
1769    } else if v.is_infinite() {
1770        if v.is_sign_positive() {
1771            "INF".to_string()
1772        } else {
1773            "-INF".to_string()
1774        }
1775    } else {
1776        v.to_string()
1777    }
1778}
1779
1780/// Format a double value for error messages (XSD canonical form)
1781fn format_double_for_error(v: f64) -> String {
1782    if v.is_nan() {
1783        "NaN".to_string()
1784    } else if v.is_infinite() {
1785        if v.is_sign_positive() {
1786            "INF".to_string()
1787        } else {
1788            "-INF".to_string()
1789        }
1790    } else {
1791        v.to_string()
1792    }
1793}
1794
1795/// Facet applicability for built-in types
1796///
1797/// Defines which facets can be applied to which primitive types.
1798#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1799pub enum FacetApplicability {
1800    /// Facet is not applicable to this type
1801    NotApplicable,
1802    /// Facet is applicable to this type
1803    Applicable,
1804    /// Facet is required for this type (e.g., whitespace for string)
1805    Required,
1806}
1807
1808/// Facet kind enumeration for checking applicability
1809#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1810pub enum FacetKind {
1811    Length,
1812    MinLength,
1813    MaxLength,
1814    Pattern,
1815    Enumeration,
1816    Whitespace,
1817    MinInclusive,
1818    MaxInclusive,
1819    MinExclusive,
1820    MaxExclusive,
1821    TotalDigits,
1822    FractionDigits,
1823    /// XSD 1.1
1824    ExplicitTimezone,
1825    /// XSD 1.1
1826    Assertion,
1827}
1828
1829impl FacetKind {
1830    /// Parse facet kind from name
1831    pub fn from_name(name: &str) -> Option<Self> {
1832        match name {
1833            "length" => Some(Self::Length),
1834            "minLength" => Some(Self::MinLength),
1835            "maxLength" => Some(Self::MaxLength),
1836            "pattern" => Some(Self::Pattern),
1837            "enumeration" => Some(Self::Enumeration),
1838            "whiteSpace" => Some(Self::Whitespace),
1839            "minInclusive" => Some(Self::MinInclusive),
1840            "maxInclusive" => Some(Self::MaxInclusive),
1841            "minExclusive" => Some(Self::MinExclusive),
1842            "maxExclusive" => Some(Self::MaxExclusive),
1843            "totalDigits" => Some(Self::TotalDigits),
1844            "fractionDigits" => Some(Self::FractionDigits),
1845            "explicitTimezone" => Some(Self::ExplicitTimezone),
1846            "assertion" => Some(Self::Assertion),
1847            _ => None,
1848        }
1849    }
1850
1851    /// Get the name of this facet kind
1852    pub fn name(&self) -> &'static str {
1853        match self {
1854            Self::Length => "length",
1855            Self::MinLength => "minLength",
1856            Self::MaxLength => "maxLength",
1857            Self::Pattern => "pattern",
1858            Self::Enumeration => "enumeration",
1859            Self::Whitespace => "whiteSpace",
1860            Self::MinInclusive => "minInclusive",
1861            Self::MaxInclusive => "maxInclusive",
1862            Self::MinExclusive => "minExclusive",
1863            Self::MaxExclusive => "maxExclusive",
1864            Self::TotalDigits => "totalDigits",
1865            Self::FractionDigits => "fractionDigits",
1866            Self::ExplicitTimezone => "explicitTimezone",
1867            Self::Assertion => "assertion",
1868        }
1869    }
1870}
1871
1872/// Check if a facet is applicable to a type (using XmlTypeCode)
1873pub fn facet_applicable_for_type(facet: FacetKind, type_code: XmlTypeCode) -> FacetApplicability {
1874    use FacetApplicability::*;
1875    use FacetKind::*;
1876    use XmlTypeCode::*;
1877
1878    match facet {
1879        // Length facets apply to string, binary, list, and URI types
1880        Length | MinLength | MaxLength => match type_code {
1881            String | NormalizedString | Token | Language | NmToken | Name | NCName | Id | IdRef
1882            | Entity | HexBinary | Base64Binary | AnyUri | QName | Notation | NmTokens | IdRefs
1883            | Entities => Applicable,
1884            _ => NotApplicable,
1885        },
1886
1887        // Pattern and enumeration apply to all atomic types
1888        Pattern | Enumeration => {
1889            if type_code.is_atomic() || type_code == AnySimpleType || type_code == AnyAtomicType {
1890                Applicable
1891            } else {
1892                NotApplicable
1893            }
1894        }
1895
1896        // Whitespace is required for string, applicable to all string-derived types
1897        Whitespace => match type_code {
1898            String => Required,
1899            NormalizedString | Token | Language | NmToken | Name | NCName | Id | IdRef | Entity => {
1900                Applicable
1901            }
1902            // All other atomic types can have whitespace
1903            _ if type_code.is_atomic() => Applicable,
1904            _ => NotApplicable,
1905        },
1906
1907        // Bound facets apply to ordered types (numeric, date/time)
1908        MinInclusive | MaxInclusive | MinExclusive | MaxExclusive => match type_code {
1909            // Decimal hierarchy
1910            Decimal | Integer | NonPositiveInteger | NegativeInteger | NonNegativeInteger
1911            | PositiveInteger | Long | Int | Short | Byte | UnsignedLong | UnsignedInt
1912            | UnsignedShort | UnsignedByte => Applicable,
1913            // Float/Double
1914            Float | Double => Applicable,
1915            // Date/time types (all have total ordering)
1916            Duration | DateTime | Time | Date | GYearMonth | GYear | GMonthDay | GDay | GMonth
1917            | YearMonthDuration | DayTimeDuration | DateTimeStamp => Applicable,
1918            _ => NotApplicable,
1919        },
1920
1921        // Digit facets apply only to decimal types
1922        TotalDigits => match type_code {
1923            Decimal | Integer | NonPositiveInteger | NegativeInteger | NonNegativeInteger
1924            | PositiveInteger | Long | Int | Short | Byte | UnsignedLong | UnsignedInt
1925            | UnsignedShort | UnsignedByte => Applicable,
1926            _ => NotApplicable,
1927        },
1928
1929        FractionDigits => match type_code {
1930            Decimal => Applicable,
1931            // Integer types have fractionDigits implicitly 0
1932            Integer | NonPositiveInteger | NegativeInteger | NonNegativeInteger
1933            | PositiveInteger | Long | Int | Short | Byte | UnsignedLong | UnsignedInt
1934            | UnsignedShort | UnsignedByte => Applicable,
1935            _ => NotApplicable,
1936        },
1937
1938        // XSD 1.1: explicitTimezone applies to date/time types with optional timezone
1939        ExplicitTimezone => match type_code {
1940            DateTime | Time | Date | GYearMonth | GYear | GMonthDay | GDay | GMonth
1941            | DateTimeStamp => Applicable,
1942            _ => NotApplicable,
1943        },
1944
1945        // XSD 1.1: assertion applies to all types
1946        Assertion => Applicable,
1947    }
1948}
1949
1950/// Check if a facet is applicable to a built-in type (by name)
1951///
1952/// This is a convenience wrapper around `facet_applicable_for_type` that
1953/// takes string names for compatibility.
1954pub fn facet_applicable(type_name: &str, facet_name: &str) -> FacetApplicability {
1955    let facet = match FacetKind::from_name(facet_name) {
1956        Some(f) => f,
1957        None => return FacetApplicability::NotApplicable,
1958    };
1959
1960    let type_code = match XmlTypeCode::from_local_name(type_name) {
1961        Some(tc) => tc,
1962        None => return FacetApplicability::NotApplicable,
1963    };
1964
1965    facet_applicable_for_type(facet, type_code)
1966}
1967
1968#[cfg(test)]
1969mod tests {
1970    use super::*;
1971    use rust_decimal::Decimal;
1972    use std::str::FromStr;
1973
1974    // =========================================================================
1975    // Basic FacetSet tests
1976    // =========================================================================
1977
1978    #[test]
1979    fn test_facet_set_empty() {
1980        let facets = FacetSet::new();
1981        assert!(facets.is_empty());
1982    }
1983
1984    #[test]
1985    fn test_facet_set_length() {
1986        let mut facets = FacetSet::new();
1987        facets.set_length(10, FacetFixed::Default, None);
1988
1989        assert!(!facets.is_empty());
1990        assert_eq!(facets.length.as_ref().unwrap().value, 10);
1991    }
1992
1993    #[test]
1994    fn test_facet_set_patterns() {
1995        let mut facets = FacetSet::new();
1996        facets
1997            .add_pattern("[a-z]+".to_string(), None, XsdVersion::V1_1, RegexCompat::Strict)
1998            .unwrap();
1999        facets
2000            .add_pattern("[0-9]+".to_string(), None, XsdVersion::V1_1, RegexCompat::Strict)
2001            .unwrap();
2002
2003        // Two adds within one FacetSet share the same derivation step (OR'd).
2004        assert_eq!(facets.patterns.len(), 1);
2005        assert_eq!(facets.patterns[0].len(), 2);
2006    }
2007
2008    #[test]
2009    fn test_facet_set_enumeration() {
2010        let mut facets = FacetSet::new();
2011        facets.add_enumeration("red".to_string(), None);
2012        facets.add_enumeration("green".to_string(), None);
2013        facets.add_enumeration("blue".to_string(), None);
2014
2015        let enum_facet = facets.enumeration.as_ref().unwrap();
2016        assert_eq!(enum_facet.values.len(), 3);
2017        assert!(enum_facet.values.contains("red"));
2018    }
2019
2020    #[test]
2021    fn test_facet_inheritance() {
2022        let mut base = FacetSet::new();
2023        base.set_min_length(5, FacetFixed::Fixed, None);
2024        base.set_max_length(100, FacetFixed::Default, None);
2025        base.add_pattern("[a-z]+".to_string(), None, XsdVersion::V1_1, RegexCompat::Strict)
2026            .unwrap();
2027
2028        let mut derived = FacetSet::new();
2029        derived.set_max_length(50, FacetFixed::Default, None); // Override
2030
2031        derived.inherit_from(&base);
2032
2033        // minLength inherited
2034        assert_eq!(derived.min_length.as_ref().unwrap().value, 5);
2035        // maxLength not inherited (was overridden)
2036        assert_eq!(derived.max_length.as_ref().unwrap().value, 50);
2037        // Base step inherited as a separate step entry.
2038        assert_eq!(derived.patterns.len(), 1);
2039        assert_eq!(derived.patterns[0].len(), 1);
2040    }
2041
2042    // =========================================================================
2043    // Facet applicability tests
2044    // =========================================================================
2045
2046    #[test]
2047    fn test_facet_applicability() {
2048        use FacetApplicability::*;
2049
2050        // Length facets apply to string types
2051        assert_eq!(facet_applicable("string", "length"), Applicable);
2052        assert_eq!(facet_applicable("decimal", "length"), NotApplicable);
2053
2054        // Numeric facets apply to numeric types
2055        assert_eq!(facet_applicable("decimal", "minInclusive"), Applicable);
2056        assert_eq!(facet_applicable("string", "minInclusive"), NotApplicable);
2057
2058        // Pattern and enumeration apply to all
2059        assert_eq!(facet_applicable("string", "pattern"), Applicable);
2060        assert_eq!(facet_applicable("decimal", "pattern"), Applicable);
2061
2062        // Whitespace is required for string
2063        assert_eq!(facet_applicable("string", "whiteSpace"), Required);
2064    }
2065
2066    #[test]
2067    fn test_facet_applicability_with_type_code() {
2068        use FacetApplicability::*;
2069        use FacetKind::*;
2070        use XmlTypeCode::*;
2071
2072        // Length facets
2073        assert_eq!(facet_applicable_for_type(Length, String), Applicable);
2074        assert_eq!(facet_applicable_for_type(Length, HexBinary), Applicable);
2075        assert_eq!(facet_applicable_for_type(Length, Decimal), NotApplicable);
2076
2077        // Digit facets
2078        assert_eq!(facet_applicable_for_type(TotalDigits, Decimal), Applicable);
2079        assert_eq!(facet_applicable_for_type(TotalDigits, Integer), Applicable);
2080        assert_eq!(facet_applicable_for_type(TotalDigits, Float), NotApplicable);
2081
2082        // Date/time facets
2083        assert_eq!(
2084            facet_applicable_for_type(ExplicitTimezone, DateTime),
2085            Applicable
2086        );
2087        assert_eq!(
2088            facet_applicable_for_type(ExplicitTimezone, String),
2089            NotApplicable
2090        );
2091    }
2092
2093    // =========================================================================
2094    // Pattern tests
2095    // =========================================================================
2096
2097    #[test]
2098    fn test_pattern_matching() {
2099        let pattern =
2100            PatternFacet::new("[a-z]+".to_string(), None, XsdVersion::V1_1, RegexCompat::Strict)
2101                .unwrap();
2102        assert!(pattern.matches("hello"));
2103        assert!(!pattern.matches("HELLO"));
2104        assert!(!pattern.matches("hello123"));
2105    }
2106
2107    #[test]
2108    fn test_pattern_xsd_anchoring() {
2109        // XSD patterns are implicitly anchored
2110        let pattern =
2111            PatternFacet::new("abc".to_string(), None, XsdVersion::V1_1, RegexCompat::Strict)
2112                .unwrap();
2113        assert!(pattern.matches("abc"));
2114        assert!(!pattern.matches("xabc"));
2115        assert!(!pattern.matches("abcx"));
2116    }
2117
2118    #[test]
2119    fn test_pattern_xsd_name_chars() {
2120        // Test \i (initial name char) and \c (name char)
2121        let pattern =
2122            PatternFacet::new(r"\i\c*".to_string(), None, XsdVersion::V1_1, RegexCompat::Strict)
2123                .unwrap();
2124        assert!(pattern.matches("foo"));
2125        assert!(pattern.matches("_bar"));
2126        assert!(pattern.matches("x123"));
2127        assert!(!pattern.matches("123"));
2128    }
2129
2130    #[test]
2131    fn test_invalid_pattern() {
2132        let result =
2133            PatternFacet::new("[invalid".to_string(), None, XsdVersion::V1_1, RegexCompat::Strict);
2134        assert!(result.is_err());
2135    }
2136
2137    // =========================================================================
2138    // Whitespace normalization tests
2139    // =========================================================================
2140
2141    #[test]
2142    fn test_whitespace_preserve() {
2143        let result = normalize_whitespace("  hello\t\nworld  ", WhitespaceMode::Preserve);
2144        assert_eq!(result, "  hello\t\nworld  ");
2145    }
2146
2147    #[test]
2148    fn test_whitespace_replace() {
2149        let result = normalize_whitespace("  hello\t\nworld  ", WhitespaceMode::Replace);
2150        assert_eq!(result, "  hello  world  ");
2151    }
2152
2153    #[test]
2154    fn test_whitespace_collapse() {
2155        let result = normalize_whitespace("  hello\t\nworld  ", WhitespaceMode::Collapse);
2156        assert_eq!(result, "hello world");
2157    }
2158
2159    #[test]
2160    fn test_whitespace_collapse_multiple_spaces() {
2161        let result = normalize_whitespace("a     b", WhitespaceMode::Collapse);
2162        assert_eq!(result, "a b");
2163    }
2164
2165    // =========================================================================
2166    // String validation tests
2167    // =========================================================================
2168
2169    #[test]
2170    fn test_validate_string_length() {
2171        let mut facets = FacetSet::new();
2172        facets.set_length(5, FacetFixed::Default, None);
2173
2174        assert!(facets.validate_string("hello").is_ok());
2175        assert!(facets.validate_string("hi").is_err());
2176        assert!(facets.validate_string("toolong").is_err());
2177    }
2178
2179    #[test]
2180    fn test_validate_string_min_max_length() {
2181        let mut facets = FacetSet::new();
2182        facets.set_min_length(3, FacetFixed::Default, None);
2183        facets.set_max_length(10, FacetFixed::Default, None);
2184
2185        assert!(facets.validate_string("hello").is_ok());
2186        assert!(facets.validate_string("hi").is_err());
2187        assert!(facets.validate_string("this is way too long").is_err());
2188    }
2189
2190    #[test]
2191    fn test_validate_string_pattern() {
2192        let mut facets = FacetSet::new();
2193        facets
2194            .add_pattern("[a-z]+".to_string(), None, XsdVersion::V1_1, RegexCompat::Strict)
2195            .unwrap();
2196
2197        assert!(facets.validate_string("hello").is_ok());
2198        assert!(facets.validate_string("HELLO").is_err());
2199    }
2200
2201    #[test]
2202    fn test_validate_string_enumeration() {
2203        let mut facets = FacetSet::new();
2204        facets.add_enumeration("red".to_string(), None);
2205        facets.add_enumeration("green".to_string(), None);
2206        facets.add_enumeration("blue".to_string(), None);
2207
2208        assert!(facets.validate_string("red").is_ok());
2209        assert!(facets.validate_string("yellow").is_err());
2210    }
2211
2212    // =========================================================================
2213    // Decimal validation tests
2214    // =========================================================================
2215
2216    #[test]
2217    fn test_validate_decimal_total_digits() {
2218        let mut facets = FacetSet::new();
2219        facets.set_total_digits(5, FacetFixed::Default, None);
2220
2221        let val = Decimal::from_str("12345").unwrap();
2222        assert!(facets.validate_decimal(&val).is_ok());
2223
2224        let val = Decimal::from_str("123456").unwrap();
2225        assert!(facets.validate_decimal(&val).is_err());
2226    }
2227
2228    #[test]
2229    fn test_validate_decimal_fraction_digits() {
2230        let mut facets = FacetSet::new();
2231        facets.set_fraction_digits(2, FacetFixed::Default, None);
2232
2233        let val = Decimal::from_str("123.45").unwrap();
2234        assert!(facets.validate_decimal(&val).is_ok());
2235
2236        let val = Decimal::from_str("123.456").unwrap();
2237        assert!(facets.validate_decimal(&val).is_err());
2238    }
2239
2240    #[test]
2241    fn test_validate_decimal_bounds() {
2242        let mut facets = FacetSet::new();
2243        facets.set_min_inclusive("0".to_string(), FacetFixed::Default, None);
2244        facets.set_max_inclusive("100".to_string(), FacetFixed::Default, None);
2245
2246        let val = Decimal::from_str("50").unwrap();
2247        assert!(facets.validate_decimal(&val).is_ok());
2248
2249        let val = Decimal::from_str("-1").unwrap();
2250        assert!(facets.validate_decimal(&val).is_err());
2251
2252        let val = Decimal::from_str("101").unwrap();
2253        assert!(facets.validate_decimal(&val).is_err());
2254    }
2255
2256    #[test]
2257    fn test_validate_decimal_exclusive_bounds() {
2258        let mut facets = FacetSet::new();
2259        facets.set_min_exclusive("0".to_string(), FacetFixed::Default, None);
2260        facets.set_max_exclusive("100".to_string(), FacetFixed::Default, None);
2261
2262        let val = Decimal::from_str("0").unwrap();
2263        assert!(facets.validate_decimal(&val).is_err()); // 0 is not > 0
2264
2265        let val = Decimal::from_str("100").unwrap();
2266        assert!(facets.validate_decimal(&val).is_err()); // 100 is not < 100
2267
2268        let val = Decimal::from_str("50").unwrap();
2269        assert!(facets.validate_decimal(&val).is_ok());
2270    }
2271
2272    // =========================================================================
2273    // Binary/List length validation tests
2274    // =========================================================================
2275
2276    #[test]
2277    fn test_validate_binary_length() {
2278        let mut facets = FacetSet::new();
2279        facets.set_length(4, FacetFixed::Default, None);
2280
2281        assert!(facets.validate_binary_length(4).is_ok());
2282        assert!(facets.validate_binary_length(3).is_err());
2283        assert!(facets.validate_binary_length(5).is_err());
2284    }
2285
2286    #[test]
2287    fn test_validate_list_length() {
2288        let mut facets = FacetSet::new();
2289        facets.set_min_length(1, FacetFixed::Default, None);
2290        facets.set_max_length(5, FacetFixed::Default, None);
2291
2292        assert!(facets.validate_list_length(3).is_ok());
2293        assert!(facets.validate_list_length(0).is_err());
2294        assert!(facets.validate_list_length(10).is_err());
2295    }
2296
2297    // =========================================================================
2298    // merge_with_base tests
2299    // =========================================================================
2300
2301    #[test]
2302    fn test_merge_with_base_inherits_facets() {
2303        let mut base = FacetSet::new();
2304        base.set_min_length(5, FacetFixed::Default, None);
2305        base.set_max_length(100, FacetFixed::Default, None);
2306
2307        let derived = FacetSet::new();
2308        let merged = derived.merge_with_base(&base).unwrap();
2309
2310        assert_eq!(merged.min_length.as_ref().unwrap().value, 5);
2311        assert_eq!(merged.max_length.as_ref().unwrap().value, 100);
2312    }
2313
2314    #[test]
2315    fn test_merge_with_base_allows_more_restrictive() {
2316        let mut base = FacetSet::new();
2317        base.set_min_length(5, FacetFixed::Default, None);
2318        base.set_max_length(100, FacetFixed::Default, None);
2319
2320        let mut derived = FacetSet::new();
2321        derived.set_min_length(10, FacetFixed::Default, None); // More restrictive
2322        derived.set_max_length(50, FacetFixed::Default, None); // More restrictive
2323
2324        let merged = derived.merge_with_base(&base).unwrap();
2325        assert_eq!(merged.min_length.as_ref().unwrap().value, 10);
2326        assert_eq!(merged.max_length.as_ref().unwrap().value, 50);
2327    }
2328
2329    #[test]
2330    fn test_merge_with_base_rejects_less_restrictive_min_length() {
2331        let mut base = FacetSet::new();
2332        base.set_min_length(10, FacetFixed::Default, None);
2333
2334        let mut derived = FacetSet::new();
2335        derived.set_min_length(5, FacetFixed::Default, None); // Less restrictive
2336
2337        let result = derived.merge_with_base(&base);
2338        assert!(result.is_err());
2339    }
2340
2341    #[test]
2342    fn test_merge_with_base_rejects_less_restrictive_max_length() {
2343        let mut base = FacetSet::new();
2344        base.set_max_length(50, FacetFixed::Default, None);
2345
2346        let mut derived = FacetSet::new();
2347        derived.set_max_length(100, FacetFixed::Default, None); // Less restrictive
2348
2349        let result = derived.merge_with_base(&base);
2350        assert!(result.is_err());
2351    }
2352
2353    #[test]
2354    fn test_merge_with_base_fixed_facet_same_value_ok() {
2355        let mut base = FacetSet::new();
2356        base.set_length(10, FacetFixed::Fixed, None);
2357
2358        let mut derived = FacetSet::new();
2359        derived.set_length(10, FacetFixed::Default, None); // Same value
2360
2361        let result = derived.merge_with_base(&base);
2362        assert!(result.is_ok());
2363    }
2364
2365    #[test]
2366    fn test_merge_with_base_fixed_facet_different_value_error() {
2367        let mut base = FacetSet::new();
2368        base.set_length(10, FacetFixed::Fixed, None);
2369
2370        let mut derived = FacetSet::new();
2371        derived.set_length(20, FacetFixed::Default, None); // Different value
2372
2373        let result = derived.merge_with_base(&base);
2374        assert!(result.is_err());
2375        if let Err(FacetError::FixedFacetViolation { facet_name, .. }) = result {
2376            assert_eq!(facet_name, "length");
2377        } else {
2378            panic!("Expected FixedFacetViolation error");
2379        }
2380    }
2381
2382    #[test]
2383    fn test_merge_with_base_patterns_cumulative() {
2384        let mut base = FacetSet::new();
2385        base.add_pattern("[a-z]+".to_string(), None, XsdVersion::V1_1, RegexCompat::Strict)
2386            .unwrap();
2387
2388        let mut derived = FacetSet::new();
2389        derived
2390            .add_pattern("[0-9]+".to_string(), None, XsdVersion::V1_1, RegexCompat::Strict)
2391            .unwrap();
2392
2393        let merged = derived.merge_with_base(&base).unwrap();
2394        // Derived step (one OR'd pattern) AND base step (one OR'd pattern)
2395        // = two separate AND'd steps.
2396        assert_eq!(merged.patterns.len(), 2);
2397    }
2398
2399    #[test]
2400    fn test_merge_with_base_enumeration_subset() {
2401        let mut base = FacetSet::new();
2402        base.add_enumeration("red".to_string(), None);
2403        base.add_enumeration("green".to_string(), None);
2404        base.add_enumeration("blue".to_string(), None);
2405
2406        let mut derived = FacetSet::new();
2407        derived.add_enumeration("red".to_string(), None);
2408        derived.add_enumeration("blue".to_string(), None);
2409
2410        let merged = derived.merge_with_base(&base);
2411        assert!(merged.is_ok());
2412    }
2413
2414    #[test]
2415    fn test_merge_with_base_enumeration_not_subset_error() {
2416        let mut base = FacetSet::new();
2417        base.add_enumeration("red".to_string(), None);
2418        base.add_enumeration("green".to_string(), None);
2419
2420        let mut derived = FacetSet::new();
2421        derived.add_enumeration("yellow".to_string(), None); // Not in base
2422
2423        let result = derived.merge_with_base(&base);
2424        assert!(result.is_err());
2425    }
2426
2427    #[test]
2428    fn test_merge_with_base_whitespace_more_restrictive() {
2429        let mut base = FacetSet::new();
2430        base.set_whitespace(WhitespaceMode::Preserve, FacetFixed::Default, None);
2431
2432        let mut derived = FacetSet::new();
2433        derived.set_whitespace(WhitespaceMode::Collapse, FacetFixed::Default, None);
2434
2435        let result = derived.merge_with_base(&base);
2436        assert!(result.is_ok());
2437    }
2438
2439    #[test]
2440    fn test_merge_with_base_whitespace_less_restrictive_error() {
2441        let mut base = FacetSet::new();
2442        base.set_whitespace(WhitespaceMode::Collapse, FacetFixed::Default, None);
2443
2444        let mut derived = FacetSet::new();
2445        derived.set_whitespace(WhitespaceMode::Preserve, FacetFixed::Default, None);
2446
2447        let result = derived.merge_with_base(&base);
2448        assert!(result.is_err());
2449    }
2450
2451    #[test]
2452    fn test_merge_with_base_digit_facets() {
2453        let mut base = FacetSet::new();
2454        base.set_total_digits(10, FacetFixed::Default, None);
2455        base.set_fraction_digits(5, FacetFixed::Default, None);
2456
2457        let mut derived = FacetSet::new();
2458        derived.set_total_digits(5, FacetFixed::Default, None); // More restrictive
2459        derived.set_fraction_digits(2, FacetFixed::Default, None); // More restrictive
2460
2461        let result = derived.merge_with_base(&base);
2462        assert!(result.is_ok());
2463    }
2464
2465    #[test]
2466    fn test_merge_with_base_digit_facets_less_restrictive_error() {
2467        let mut base = FacetSet::new();
2468        base.set_total_digits(5, FacetFixed::Default, None);
2469
2470        let mut derived = FacetSet::new();
2471        derived.set_total_digits(10, FacetFixed::Default, None); // Less restrictive
2472
2473        let result = derived.merge_with_base(&base);
2474        assert!(result.is_err());
2475    }
2476
2477    // =========================================================================
2478    // Consistency validation tests
2479    // =========================================================================
2480
2481    #[test]
2482    fn test_consistency_min_greater_than_max_length() {
2483        let mut base = FacetSet::new();
2484        base.set_min_length(10, FacetFixed::Default, None);
2485        base.set_max_length(5, FacetFixed::Default, None);
2486
2487        let result = base.merge_with_base(&FacetSet::new());
2488        assert!(result.is_err());
2489    }
2490
2491    #[test]
2492    fn test_consistency_both_inclusive_and_exclusive() {
2493        let mut base = FacetSet::new();
2494        base.set_min_inclusive("0".to_string(), FacetFixed::Default, None);
2495        base.set_min_exclusive("0".to_string(), FacetFixed::Default, None);
2496
2497        let result = base.merge_with_base(&FacetSet::new());
2498        assert!(result.is_err());
2499    }
2500
2501    #[test]
2502    fn test_consistency_fraction_greater_than_total() {
2503        let mut base = FacetSet::new();
2504        base.set_total_digits(3, FacetFixed::Default, None);
2505        base.set_fraction_digits(5, FacetFixed::Default, None);
2506
2507        let result = base.merge_with_base(&FacetSet::new());
2508        assert!(result.is_err());
2509    }
2510
2511    // =========================================================================
2512    // FacetKind tests
2513    // =========================================================================
2514
2515    #[test]
2516    fn test_facet_kind_from_name() {
2517        assert_eq!(FacetKind::from_name("length"), Some(FacetKind::Length));
2518        assert_eq!(
2519            FacetKind::from_name("minLength"),
2520            Some(FacetKind::MinLength)
2521        );
2522        assert_eq!(FacetKind::from_name("pattern"), Some(FacetKind::Pattern));
2523        assert_eq!(FacetKind::from_name("unknown"), None);
2524    }
2525
2526    #[test]
2527    fn test_facet_kind_name_roundtrip() {
2528        let kinds = [
2529            FacetKind::Length,
2530            FacetKind::MinLength,
2531            FacetKind::MaxLength,
2532            FacetKind::Pattern,
2533            FacetKind::Enumeration,
2534            FacetKind::Whitespace,
2535            FacetKind::MinInclusive,
2536            FacetKind::MaxInclusive,
2537            FacetKind::MinExclusive,
2538            FacetKind::MaxExclusive,
2539            FacetKind::TotalDigits,
2540            FacetKind::FractionDigits,
2541            FacetKind::ExplicitTimezone,
2542            FacetKind::Assertion,
2543        ];
2544
2545        for kind in kinds {
2546            let name = kind.name();
2547            assert_eq!(FacetKind::from_name(name), Some(kind));
2548        }
2549    }
2550
2551    // =========================================================================
2552    // XSD pattern to Rust conversion tests (XSD 1.0 path only)
2553    // =========================================================================
2554
2555    #[cfg(not(feature = "xsd11"))]
2556    #[test]
2557    fn test_xsd_pattern_anchoring() {
2558        let rust = convert_xml_pattern("abc", ConvertOptions::xsd());
2559        assert!(rust.starts_with('^'));
2560        assert!(rust.ends_with('$'));
2561    }
2562
2563    #[cfg(not(feature = "xsd11"))]
2564    #[test]
2565    fn test_xsd_pattern_initial_name_char() {
2566        let rust = convert_xml_pattern(r"\i", ConvertOptions::xsd());
2567        assert!(rust.contains("[A-Za-z_:]"));
2568    }
2569
2570    #[cfg(not(feature = "xsd11"))]
2571    #[test]
2572    fn test_xsd_pattern_name_char() {
2573        let rust = convert_xml_pattern(r"\c", ConvertOptions::xsd());
2574        // The hyphen is escaped in the character class
2575        assert!(rust.contains(r"[A-Za-z0-9._:\-]"));
2576    }
2577
2578    #[cfg(not(feature = "xsd11"))]
2579    #[test]
2580    fn test_xsd_pattern_standard_escapes() {
2581        let rust = convert_xml_pattern(r"\d+\s*", ConvertOptions::xsd());
2582        assert!(rust.contains(r"\d"));
2583        assert!(rust.contains(r"\s"));
2584    }
2585}