Skip to main content

xsd_schema/parser/
attrs.rs

1//! Attribute parsing and validation
2//!
3//! This module handles parsing and validation of XSD element attributes.
4
5use quick_xml::events::attributes::Attribute;
6
7use crate::error::{SchemaError, SchemaResult};
8use crate::ids::NameId;
9use crate::namespace::{NameTable, NamespaceContext, XS_NAMESPACE};
10use crate::parser::location::SourceRef;
11use crate::schema::annotation::ForeignAttribute;
12
13/// Parsed attribute value
14#[derive(Debug, Clone)]
15pub struct ParsedAttribute {
16    /// Namespace (None for unqualified attributes)
17    pub namespace: Option<NameId>,
18    /// Local name
19    pub local_name: NameId,
20    /// Prefix (for QName reconstruction)
21    pub prefix: Option<NameId>,
22    /// Value as string
23    pub value: String,
24    /// Source location
25    pub source: Option<SourceRef>,
26}
27
28impl ParsedAttribute {
29    /// Check if this is a namespace declaration (xmlns or xmlns:prefix)
30    pub fn is_namespace_decl(&self, xmlns_prefix_id: NameId, xmlns_ns_id: NameId) -> bool {
31        // xmlns:foo or xmlns
32        self.prefix == Some(xmlns_prefix_id)
33            || self.local_name == xmlns_prefix_id
34            || self.namespace == Some(xmlns_ns_id)
35    }
36
37    /// Check if this is an XSD attribute
38    pub fn is_xsd_attribute(&self, xsd_ns_id: NameId) -> bool {
39        // XSD attributes are either unqualified or in the XSD namespace
40        self.namespace.is_none() || self.namespace == Some(xsd_ns_id)
41    }
42}
43
44/// Parse attributes from a quick-xml BytesStart
45pub fn parse_attributes<'a>(
46    attrs: impl Iterator<Item = Result<Attribute<'a>, quick_xml::events::attributes::AttrError>>,
47    ns_context: &mut NamespaceContext,
48    source: Option<SourceRef>,
49) -> SchemaResult<Vec<ParsedAttribute>> {
50    let mut result = Vec::new();
51
52    for attr_result in attrs {
53        let attr = attr_result.map_err(|e| SchemaError::XmlError {
54            message: format!("Attribute error: {}", e),
55            location: None,
56        })?;
57
58        let name = attr.key.as_ref();
59        // Use unescape_value which works without encoding feature
60        let value = attr
61            .unescape_value()
62            .map_err(|e| SchemaError::XmlError {
63                message: format!("Attribute value decode error: {}", e),
64                location: None,
65            })?
66            .into_owned();
67
68        // Split into prefix and local name
69        let (local_name_bytes, prefix_bytes) = crate::parser::reader::split_qname(name);
70
71        let name_table = ns_context.name_table_mut();
72        let local_name_str =
73            std::str::from_utf8(local_name_bytes).map_err(|e| SchemaError::XmlError {
74                message: format!("Invalid UTF-8 in attribute name: {}", e),
75                location: None,
76            })?;
77        let local_name = name_table.add(local_name_str);
78
79        let prefix = match prefix_bytes {
80            Some(p) => {
81                let prefix_str = std::str::from_utf8(p).map_err(|e| SchemaError::XmlError {
82                    message: format!("Invalid UTF-8 in prefix: {}", e),
83                    location: None,
84                })?;
85                Some(name_table.add(prefix_str))
86            }
87            None => None,
88        };
89
90        // Resolve namespace from prefix
91        let namespace = match prefix {
92            Some(prefix_id) => ns_context.lookup_namespace_by_id(prefix_id),
93            None => None, // Unqualified attributes have no namespace
94        };
95
96        result.push(ParsedAttribute {
97            namespace,
98            local_name,
99            prefix,
100            value,
101            source: source.clone(),
102        });
103    }
104
105    Ok(result)
106}
107
108/// Extract XSD attributes and foreign attributes from parsed attributes
109pub fn categorize_attributes(
110    attrs: Vec<ParsedAttribute>,
111    name_table: &NameTable,
112) -> (Vec<ParsedAttribute>, Vec<ForeignAttribute>) {
113    let xsd_ns = name_table.get(XS_NAMESPACE);
114
115    let mut xsd_attrs = Vec::new();
116    let mut foreign_attrs = Vec::new();
117
118    for attr in attrs {
119        // Skip namespace declarations
120        let xmlns_id = name_table.get("xmlns");
121        let xmlns_ns_id = name_table.get(crate::namespace::XMLNS_NAMESPACE);
122
123        if let (Some(xmlns), Some(xmlns_ns)) = (xmlns_id, xmlns_ns_id) {
124            if attr.is_namespace_decl(xmlns, xmlns_ns) {
125                continue;
126            }
127        }
128
129        // Categorize as XSD or foreign
130        match (attr.namespace, xsd_ns) {
131            (None, _) => {
132                // Unqualified attribute - could be XSD attribute
133                xsd_attrs.push(attr);
134            }
135            (Some(ns), Some(xsd)) if ns == xsd => {
136                // Explicitly in XSD namespace
137                xsd_attrs.push(attr);
138            }
139            _ => {
140                // Foreign attribute
141                foreign_attrs.push(ForeignAttribute {
142                    namespace: attr.namespace,
143                    local_name: attr.local_name,
144                    prefix: attr.prefix,
145                    value: attr.value,
146                    source: attr.source,
147                });
148            }
149        }
150    }
151
152    (xsd_attrs, foreign_attrs)
153}
154
155/// Attribute lookup helper
156pub struct AttributeMap {
157    attrs: Vec<ParsedAttribute>,
158}
159
160impl AttributeMap {
161    /// Create from parsed attributes (XSD attributes only)
162    pub fn new(attrs: Vec<ParsedAttribute>) -> Self {
163        Self { attrs }
164    }
165
166    /// Get an attribute by local name
167    pub fn get(&self, name_id: NameId) -> Option<&ParsedAttribute> {
168        self.attrs.iter().find(|a| a.local_name == name_id)
169    }
170
171    /// Get an attribute value by local name
172    pub fn get_value(&self, name_id: NameId) -> Option<&str> {
173        self.get(name_id).map(|a| a.value.as_str())
174    }
175
176    /// Get an attribute value by local name string (looks up in name table)
177    pub fn get_value_by_name(&self, name_table: &NameTable, name: &str) -> Option<&str> {
178        let name_id = name_table.get(name)?;
179        self.get_value(name_id)
180    }
181
182    /// Check if an attribute exists
183    pub fn has(&self, name_id: NameId) -> bool {
184        self.attrs.iter().any(|a| a.local_name == name_id)
185    }
186
187    /// Get all attribute names
188    pub fn names(&self) -> impl Iterator<Item = NameId> + '_ {
189        self.attrs.iter().map(|a| a.local_name)
190    }
191
192    /// Remove an attribute and return it
193    pub fn take(&mut self, name_id: NameId) -> Option<ParsedAttribute> {
194        if let Some(pos) = self.attrs.iter().position(|a| a.local_name == name_id) {
195            Some(self.attrs.remove(pos))
196        } else {
197            None
198        }
199    }
200
201    /// Get remaining attributes (for detecting unknown attributes)
202    pub fn remaining(&self) -> &[ParsedAttribute] {
203        &self.attrs
204    }
205
206    /// Check if empty
207    pub fn is_empty(&self) -> bool {
208        self.attrs.is_empty()
209    }
210}
211
212/// Parse a boolean attribute value.
213///
214/// `xs:boolean` has a fixed `whiteSpace=collapse` facet (XSD Part 2 §3.3.2),
215/// so `" 1 "` must parse as `true`. We do **not** use [`str::trim`] because
216/// it strips the full Unicode whitespace set, but §4.3.6 defines the
217/// whiteSpace facet over only `#x20 #x9 #xA #xD`. Values padded with
218/// non-XML whitespace (e.g. NBSP `U+00A0`) must still be rejected.
219pub fn parse_boolean(value: &str) -> Result<bool, String> {
220    let is_xml_ws = |c: char| matches!(c, ' ' | '\t' | '\n' | '\r');
221    match value.trim_matches(is_xml_ws) {
222        "true" | "1" => Ok(true),
223        "false" | "0" => Ok(false),
224        _ => Err(format!("Invalid boolean value: '{}'", value)),
225    }
226}
227
228/// Parse an occurrence count (minOccurs/maxOccurs)
229///
230/// XSD `nonNegativeInteger` has no upper bound, so values larger than `u32::MAX`
231/// are valid. We clamp them to `u32::MAX`; the compiler treats anything above
232/// `MAX_COUNTED_OCCURS` (10 000) as effectively unbounded.
233pub fn parse_occurs(value: &str) -> Result<Option<u32>, String> {
234    if value == "unbounded" {
235        Ok(None)
236    } else {
237        match value.parse::<u32>() {
238            Ok(n) => Ok(Some(n)),
239            Err(_) => {
240                // Accept valid non-negative integers that overflow u32
241                if !value.is_empty() && value.bytes().all(|b| b.is_ascii_digit()) {
242                    Ok(Some(u32::MAX))
243                } else {
244                    Err(format!("Invalid occurrence value: '{}'", value))
245                }
246            }
247        }
248    }
249}
250
251/// Parse a use attribute (required/optional/prohibited)
252pub fn parse_use(value: &str) -> Result<crate::types::complex::AttributeUseKind, String> {
253    use crate::types::complex::AttributeUseKind;
254    match value {
255        "required" => Ok(AttributeUseKind::Required),
256        "optional" => Ok(AttributeUseKind::Optional),
257        "prohibited" => Ok(AttributeUseKind::Prohibited),
258        _ => Err(format!("Invalid use value: '{}'", value)),
259    }
260}
261
262/// Parse a processContents attribute
263pub fn parse_process_contents(value: &str) -> Result<crate::schema::ProcessContents, String> {
264    value
265        .parse()
266        .map_err(|_| format!("Invalid processContents value: '{}'", value))
267}
268
269/// Parse a form attribute (qualified/unqualified)
270pub fn parse_form(value: &str) -> Result<crate::schema::FormKind, String> {
271    match value {
272        "qualified" => Ok(crate::schema::FormKind::Qualified),
273        "unqualified" => Ok(crate::schema::FormKind::Unqualified),
274        _ => Err(format!("Invalid form value: '{}'", value)),
275    }
276}
277
278#[cfg(test)]
279mod tests {
280    use super::*;
281
282    #[test]
283    fn test_parse_boolean() {
284        assert_eq!(parse_boolean("true"), Ok(true));
285        assert_eq!(parse_boolean("1"), Ok(true));
286        assert_eq!(parse_boolean("false"), Ok(false));
287        assert_eq!(parse_boolean("0"), Ok(false));
288        assert!(parse_boolean("yes").is_err());
289    }
290
291    #[test]
292    fn test_parse_occurs() {
293        assert_eq!(parse_occurs("0"), Ok(Some(0)));
294        assert_eq!(parse_occurs("1"), Ok(Some(1)));
295        assert_eq!(parse_occurs("100"), Ok(Some(100)));
296        assert_eq!(parse_occurs("unbounded"), Ok(None));
297        assert!(parse_occurs("invalid").is_err());
298    }
299
300    #[test]
301    fn test_parse_use() {
302        use crate::types::complex::AttributeUseKind;
303        assert_eq!(parse_use("required"), Ok(AttributeUseKind::Required));
304        assert_eq!(parse_use("optional"), Ok(AttributeUseKind::Optional));
305        assert_eq!(parse_use("prohibited"), Ok(AttributeUseKind::Prohibited));
306        assert!(parse_use("invalid").is_err());
307    }
308
309    #[test]
310    fn test_parse_process_contents() {
311        use crate::schema::ProcessContents;
312        assert_eq!(
313            parse_process_contents("strict"),
314            Ok(ProcessContents::Strict)
315        );
316        assert_eq!(parse_process_contents("lax"), Ok(ProcessContents::Lax));
317        assert_eq!(parse_process_contents("skip"), Ok(ProcessContents::Skip));
318        assert!(parse_process_contents("invalid").is_err());
319    }
320
321    #[test]
322    fn test_parse_form() {
323        use crate::schema::FormKind;
324        assert_eq!(parse_form("qualified"), Ok(FormKind::Qualified));
325        assert_eq!(parse_form("unqualified"), Ok(FormKind::Unqualified));
326        assert!(parse_form("invalid").is_err());
327    }
328
329    #[test]
330    fn test_attribute_map() {
331        let attrs = vec![
332            ParsedAttribute {
333                namespace: None,
334                local_name: NameId(1),
335                prefix: None,
336                value: "value1".to_string(),
337                source: None,
338            },
339            ParsedAttribute {
340                namespace: None,
341                local_name: NameId(2),
342                prefix: None,
343                value: "value2".to_string(),
344                source: None,
345            },
346        ];
347
348        let map = AttributeMap::new(attrs);
349        assert!(map.has(NameId(1)));
350        assert!(map.has(NameId(2)));
351        assert!(!map.has(NameId(3)));
352
353        assert_eq!(map.get_value(NameId(1)), Some("value1"));
354        assert_eq!(map.get_value(NameId(3)), None);
355    }
356
357    #[test]
358    fn test_attribute_map_take() {
359        let attrs = vec![ParsedAttribute {
360            namespace: None,
361            local_name: NameId(1),
362            prefix: None,
363            value: "value1".to_string(),
364            source: None,
365        }];
366
367        let mut map = AttributeMap::new(attrs);
368        assert!(!map.is_empty());
369
370        let taken = map.take(NameId(1));
371        assert!(taken.is_some());
372        assert!(map.is_empty());
373    }
374}