Skip to main content

xsd_schema/namespace/
qname.rs

1//! QName parsing and validation
2//!
3//! Provides QName parsing with NCName validation following XPath2/XSD semantics.
4//! - InvalidLexical (FORG0001): Malformed QName syntax
5//! - UndefinedPrefix (XPST0081): Prefix not in scope
6
7use super::context::{NamespaceContext, NamespaceContextSnapshot};
8use super::table::NameTable;
9use crate::ids::NameId;
10use std::fmt;
11
12/// Qualified name with interned strings via NameTable
13///
14/// A QName consists of:
15/// - Optional prefix (e.g., "xs" in "xs:string")
16/// - Local name (e.g., "string" in "xs:string")
17/// - Resolved namespace URI (e.g., XSD namespace)
18#[derive(Debug, Clone)]
19pub struct QualifiedName {
20    /// Namespace URI (None = no namespace)
21    pub namespace_uri: Option<NameId>,
22    /// Local name part
23    pub local_name: NameId,
24    /// Original prefix (None = unprefixed)
25    pub prefix: Option<NameId>,
26}
27
28/// QName equality is defined by namespace URI + local name only (per XML Namespaces).
29/// The prefix is a syntactic artifact and does not affect identity.
30impl PartialEq for QualifiedName {
31    fn eq(&self, other: &Self) -> bool {
32        self.namespace_uri == other.namespace_uri && self.local_name == other.local_name
33    }
34}
35
36impl Eq for QualifiedName {}
37
38impl std::hash::Hash for QualifiedName {
39    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
40        self.namespace_uri.hash(state);
41        self.local_name.hash(state);
42    }
43}
44
45impl QualifiedName {
46    /// Create a new QualifiedName
47    pub fn new(namespace_uri: Option<NameId>, local_name: NameId, prefix: Option<NameId>) -> Self {
48        Self {
49            namespace_uri,
50            local_name,
51            prefix,
52        }
53    }
54
55    /// Create a QualifiedName with no namespace
56    pub fn local(local_name: NameId) -> Self {
57        Self {
58            namespace_uri: None,
59            local_name,
60            prefix: None,
61        }
62    }
63
64    /// Check if this QName has a namespace
65    pub fn has_namespace(&self) -> bool {
66        self.namespace_uri.is_some()
67    }
68
69    /// Check if this QName is prefixed
70    pub fn is_prefixed(&self) -> bool {
71        self.prefix.is_some()
72    }
73}
74
75/// Error type for QName parsing
76#[derive(Debug, Clone, PartialEq, Eq)]
77pub enum QNameError {
78    /// Invalid lexical form (FORG0001)
79    InvalidLexical(String),
80    /// Undefined prefix (XPST0081)
81    UndefinedPrefix(String),
82    /// Empty local name
83    EmptyLocalName,
84}
85
86impl fmt::Display for QNameError {
87    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
88        match self {
89            QNameError::InvalidLexical(s) => write!(f, "Invalid QName syntax: '{}'", s),
90            QNameError::UndefinedPrefix(p) => write!(f, "Undefined prefix: '{}'", p),
91            QNameError::EmptyLocalName => write!(f, "Empty local name in QName"),
92        }
93    }
94}
95
96impl std::error::Error for QNameError {}
97
98/// Parse a QName string into its components
99///
100/// # Arguments
101///
102/// * `qname` - The QName string to parse (e.g., "xs:string" or "localName")
103/// * `ns_context` - Namespace context for prefix resolution (mutable for string interning)
104/// * `use_default_ns` - Whether to use default namespace for unprefixed names
105///
106/// # Returns
107///
108/// A `QualifiedName` with resolved namespace, or an error.
109///
110/// # Errors
111///
112/// - `InvalidLexical` if the QName syntax is invalid
113/// - `UndefinedPrefix` if the prefix is not bound in the namespace context
114pub fn parse_qname(
115    qname: &str,
116    ns_context: &mut NamespaceContext,
117    use_default_ns: bool,
118) -> Result<QualifiedName, QNameError> {
119    let qname = qname.trim();
120
121    if qname.is_empty() {
122        return Err(QNameError::EmptyLocalName);
123    }
124
125    // Split on ':' to find prefix
126    let (prefix_str, local_str) = match qname.find(':') {
127        Some(pos) => {
128            if pos == 0 {
129                return Err(QNameError::InvalidLexical(qname.to_string()));
130            }
131            let prefix = &qname[..pos];
132            let local = &qname[pos + 1..];
133
134            // Check for multiple colons
135            if local.contains(':') {
136                return Err(QNameError::InvalidLexical(qname.to_string()));
137            }
138
139            (Some(prefix), local)
140        }
141        None => (None, qname),
142    };
143
144    // Validate local name
145    if local_str.is_empty() {
146        return Err(QNameError::EmptyLocalName);
147    }
148
149    if !is_ncname(local_str) {
150        return Err(QNameError::InvalidLexical(qname.to_string()));
151    }
152
153    // Validate and resolve prefix
154    let (namespace_uri, prefix_id) = match prefix_str {
155        Some(prefix) => {
156            if !is_ncname(prefix) {
157                return Err(QNameError::InvalidLexical(qname.to_string()));
158            }
159
160            let prefix_id = ns_context.name_table_mut().add(prefix);
161            match ns_context.lookup_namespace_by_id(prefix_id) {
162                Some(ns_id) => (Some(ns_id), Some(prefix_id)),
163                None => return Err(QNameError::UndefinedPrefix(prefix.to_string())),
164            }
165        }
166        None => {
167            // Unprefixed name - use default namespace if requested
168            let namespace_uri = if use_default_ns {
169                ns_context.default_namespace()
170            } else {
171                None
172            };
173            (namespace_uri, None)
174        }
175    };
176
177    let local_id = ns_context.name_table_mut().add(local_str);
178
179    Ok(QualifiedName::new(namespace_uri, local_id, prefix_id))
180}
181
182/// Parse a QName string using an immutable `NamespaceContextSnapshot` + `NameTable`
183///
184/// Same parsing/validation logic as [`parse_qname`] but works with a snapshot
185/// of namespace bindings instead of a mutable `NamespaceContext`. This is useful
186/// during validation where only a snapshot is available (e.g., xsi:type resolution).
187///
188/// Uses `name_table.add()` to intern names, since instance values may contain
189/// names not yet present in the table.
190///
191/// # Arguments
192///
193/// * `qname` - The QName string to parse (e.g., "xs:string" or "localName")
194/// * `ns_snapshot` - Snapshot of namespace bindings for prefix resolution
195/// * `name_table` - Name table for string interning
196/// * `use_default_ns` - Whether to use default namespace for unprefixed names
197///
198/// # Errors
199///
200/// - `InvalidLexical` if the QName syntax is invalid
201/// - `UndefinedPrefix` if the prefix is not bound in the snapshot
202pub fn parse_qname_with_snapshot(
203    qname: &str,
204    ns_snapshot: &NamespaceContextSnapshot,
205    name_table: &NameTable,
206    use_default_ns: bool,
207) -> Result<QualifiedName, QNameError> {
208    let qname = qname.trim();
209
210    if qname.is_empty() {
211        return Err(QNameError::EmptyLocalName);
212    }
213
214    // Split on ':' to find prefix
215    let (prefix_str, local_str) = match qname.find(':') {
216        Some(pos) => {
217            if pos == 0 {
218                return Err(QNameError::InvalidLexical(qname.to_string()));
219            }
220            let prefix = &qname[..pos];
221            let local = &qname[pos + 1..];
222
223            // Check for multiple colons
224            if local.contains(':') {
225                return Err(QNameError::InvalidLexical(qname.to_string()));
226            }
227
228            (Some(prefix), local)
229        }
230        None => (None, qname),
231    };
232
233    // Validate local name
234    if local_str.is_empty() {
235        return Err(QNameError::EmptyLocalName);
236    }
237
238    if !is_ncname(local_str) {
239        return Err(QNameError::InvalidLexical(qname.to_string()));
240    }
241
242    // Validate and resolve prefix
243    let (namespace_uri, prefix_id) = match prefix_str {
244        Some(prefix) => {
245            if !is_ncname(prefix) {
246                return Err(QNameError::InvalidLexical(qname.to_string()));
247            }
248
249            let prefix_id = name_table.add(prefix);
250            match ns_snapshot.resolve_prefix(prefix_id) {
251                Some(ns_id) => (Some(ns_id), Some(prefix_id)),
252                None => return Err(QNameError::UndefinedPrefix(prefix.to_string())),
253            }
254        }
255        None => {
256            // Unprefixed name - use default namespace if requested
257            let namespace_uri = if use_default_ns {
258                ns_snapshot.default_namespace()
259            } else {
260                None
261            };
262            (namespace_uri, None)
263        }
264    };
265
266    let local_id = name_table.add(local_str);
267
268    Ok(QualifiedName::new(namespace_uri, local_id, prefix_id))
269}
270
271/// Check if a string is a valid NCName (non-colonized name)
272///
273/// NCName = Name - ':'
274/// Simplified check: start with letter or '_', followed by letters, digits, '.', '-', '_'
275pub fn is_ncname(s: &str) -> bool {
276    if s.is_empty() {
277        return false;
278    }
279
280    let mut chars = s.chars();
281
282    // First character must be NameStartChar (excluding ':')
283    match chars.next() {
284        Some(c) if is_name_start_char(c) => {}
285        _ => return false,
286    }
287
288    // Remaining characters must be NameChar (excluding ':')
289    for c in chars {
290        if !is_name_char(c) {
291            return false;
292        }
293    }
294
295    true
296}
297
298/// Check if a character is a valid NameStartChar (per XML spec, excluding ':')
299fn is_name_start_char(c: char) -> bool {
300    matches!(c,
301        'A'..='Z' |
302        '_' |
303        'a'..='z' |
304        '\u{C0}'..='\u{D6}' |
305        '\u{D8}'..='\u{F6}' |
306        '\u{F8}'..='\u{2FF}' |
307        '\u{370}'..='\u{37D}' |
308        '\u{37F}'..='\u{1FFF}' |
309        '\u{200C}'..='\u{200D}' |
310        '\u{2070}'..='\u{218F}' |
311        '\u{2C00}'..='\u{2FEF}' |
312        '\u{3001}'..='\u{D7FF}' |
313        '\u{F900}'..='\u{FDCF}' |
314        '\u{FDF0}'..='\u{FFFD}' |
315        '\u{10000}'..='\u{EFFFF}'
316    )
317}
318
319/// Check if a character is a valid NameChar (per XML spec, excluding ':')
320fn is_name_char(c: char) -> bool {
321    is_name_start_char(c)
322        || matches!(c,
323            '-' |
324            '.' |
325            '0'..='9' |
326            '\u{B7}' |
327            '\u{0300}'..='\u{036F}' |
328            '\u{203F}'..='\u{2040}'
329        )
330}
331
332#[cfg(test)]
333mod tests {
334    use super::*;
335
336    #[test]
337    fn test_is_ncname_valid() {
338        assert!(is_ncname("foo"));
339        assert!(is_ncname("_bar"));
340        assert!(is_ncname("foo123"));
341        assert!(is_ncname("foo-bar"));
342        assert!(is_ncname("foo.bar"));
343        assert!(is_ncname("foo_bar"));
344        assert!(is_ncname("Élément")); // Unicode
345    }
346
347    #[test]
348    fn test_is_ncname_invalid() {
349        assert!(!is_ncname("")); // Empty
350        assert!(!is_ncname("123foo")); // Starts with digit
351        assert!(!is_ncname("-foo")); // Starts with hyphen
352        assert!(!is_ncname(".foo")); // Starts with dot
353        assert!(!is_ncname("foo:bar")); // Contains colon
354        assert!(!is_ncname("foo bar")); // Contains space
355    }
356
357    #[test]
358    fn test_qualified_name_local() {
359        let local = QualifiedName::local(NameId(1));
360        assert!(!local.has_namespace());
361        assert!(!local.is_prefixed());
362    }
363
364    #[test]
365    fn test_qualified_name_prefixed() {
366        let qn = QualifiedName::new(Some(NameId(1)), NameId(2), Some(NameId(3)));
367        assert!(qn.has_namespace());
368        assert!(qn.is_prefixed());
369    }
370
371    // --- parse_qname_with_snapshot tests ---
372
373    /// Helper: create a NameTable + NamespaceContextSnapshot with given bindings
374    fn make_snapshot(
375        prefixes: &[(&str, &str)],
376        default_ns: Option<&str>,
377    ) -> (NameTable, NamespaceContextSnapshot) {
378        use super::super::context::NamespaceContext;
379        let mut table = NameTable::new();
380        let mut ctx = NamespaceContext::new(&mut table);
381        ctx.push_scope();
382        for &(prefix, uri) in prefixes {
383            ctx.add_namespace(prefix, uri);
384        }
385        if let Some(uri) = default_ns {
386            ctx.add_namespace("", uri);
387        }
388        let snapshot = ctx.snapshot();
389        drop(ctx);
390        (table, snapshot)
391    }
392
393    #[test]
394    fn test_snapshot_prefixed_qname() {
395        let (table, snapshot) = make_snapshot(&[("xs", "http://www.w3.org/2001/XMLSchema")], None);
396        let result = parse_qname_with_snapshot("xs:string", &snapshot, &table, true).unwrap();
397        assert_eq!(table.resolve(result.local_name), "string");
398        assert!(result.prefix.is_some());
399        assert_eq!(table.resolve(result.prefix.unwrap()), "xs");
400        assert!(result.namespace_uri.is_some());
401        assert_eq!(
402            table.resolve(result.namespace_uri.unwrap()),
403            "http://www.w3.org/2001/XMLSchema"
404        );
405    }
406
407    #[test]
408    fn test_snapshot_unprefixed_with_default_ns() {
409        let (table, snapshot) = make_snapshot(&[], Some("http://default.com"));
410        let result = parse_qname_with_snapshot("localName", &snapshot, &table, true).unwrap();
411        assert_eq!(table.resolve(result.local_name), "localName");
412        assert!(result.prefix.is_none());
413        assert!(result.namespace_uri.is_some());
414        assert_eq!(
415            table.resolve(result.namespace_uri.unwrap()),
416            "http://default.com"
417        );
418    }
419
420    #[test]
421    fn test_snapshot_unprefixed_without_default_ns() {
422        let (table, snapshot) = make_snapshot(&[], None);
423        let result = parse_qname_with_snapshot("localName", &snapshot, &table, true).unwrap();
424        assert_eq!(table.resolve(result.local_name), "localName");
425        assert!(result.namespace_uri.is_none());
426    }
427
428    #[test]
429    fn test_snapshot_unprefixed_default_ns_not_used() {
430        let (table, snapshot) = make_snapshot(&[], Some("http://default.com"));
431        // use_default_ns = false => namespace should be None
432        let result = parse_qname_with_snapshot("localName", &snapshot, &table, false).unwrap();
433        assert!(result.namespace_uri.is_none());
434    }
435
436    #[test]
437    fn test_snapshot_invalid_ncname_local() {
438        let (table, snapshot) = make_snapshot(&[("xs", "http://www.w3.org/2001/XMLSchema")], None);
439        let err = parse_qname_with_snapshot("xs:123bad", &snapshot, &table, true).unwrap_err();
440        assert!(matches!(err, QNameError::InvalidLexical(_)));
441    }
442
443    #[test]
444    fn test_snapshot_invalid_ncname_prefix() {
445        let (table, snapshot) = make_snapshot(&[], None);
446        let err = parse_qname_with_snapshot("123:foo", &snapshot, &table, true).unwrap_err();
447        assert!(matches!(err, QNameError::InvalidLexical(_)));
448    }
449
450    #[test]
451    fn test_snapshot_undefined_prefix() {
452        let (table, snapshot) = make_snapshot(&[], None);
453        let err = parse_qname_with_snapshot("nope:foo", &snapshot, &table, true).unwrap_err();
454        assert!(matches!(err, QNameError::UndefinedPrefix(_)));
455    }
456
457    #[test]
458    fn test_snapshot_empty_input() {
459        let (table, snapshot) = make_snapshot(&[], None);
460        let err = parse_qname_with_snapshot("", &snapshot, &table, true).unwrap_err();
461        assert!(matches!(err, QNameError::EmptyLocalName));
462    }
463
464    #[test]
465    fn test_snapshot_whitespace_trimmed() {
466        let (table, snapshot) = make_snapshot(&[("xs", "http://www.w3.org/2001/XMLSchema")], None);
467        let result = parse_qname_with_snapshot("  xs:string  ", &snapshot, &table, true).unwrap();
468        assert_eq!(table.resolve(result.local_name), "string");
469    }
470}