anyxml/
lib.rs

1#![doc = include_str!("../README.md")]
2
3pub mod automata;
4pub mod catalog;
5pub mod encoding;
6pub mod error;
7mod parse;
8mod save;
9pub mod sax;
10pub mod stax;
11pub mod tree;
12pub mod uri;
13pub mod xpath;
14
15use std::marker::PhantomData;
16
17use crate::sax::{parser::ParserSubState, source::InputSource};
18
19/// Maximum length of XML version numbers accepted by the parser
20const XML_VERSION_NUM_LIMIT_LENGTH: usize = 128;
21/// Maximum length of encoding names accepted by the parser
22const ENCODING_NAME_LIMIT_LENGTH: usize = 128;
23/// Approximate chunk length when the parser reports character data
24const CHARDATA_CHUNK_LENGTH: usize = 4096;
25
26const XML_XML_NAMESPACE: &str = "http://www.w3.org/XML/1998/namespace";
27const XML_NS_NAMESPACE: &str = "http://www.w3.org/2000/xmlns/";
28
29pub trait ParserSpec {
30    type Reader;
31    type SpecificContext;
32}
33
34pub struct DefaultParserSpec<'a> {
35    _phantom: PhantomData<&'a ()>,
36}
37
38impl<'a> ParserSpec for DefaultParserSpec<'a> {
39    type Reader = InputSource<'a>;
40    type SpecificContext = ();
41}
42
43pub struct ProgressiveParserSpec;
44
45impl ParserSpec for ProgressiveParserSpec {
46    type Reader = InputSource<'static>;
47    type SpecificContext = ProgressiveParserSpecificContext;
48}
49
50#[derive(Debug, Default)]
51pub struct ProgressiveParserSpecificContext {
52    pub(crate) seen: usize,
53    pub(crate) quote: u8,
54    pub(crate) sub_state: ParserSubState,
55    // (QName, prefix length, namespace stack length)
56    pub(crate) element_stack: Vec<(String, usize, usize)>,
57    // (old element stack length, old xml version, old encoding)
58    pub(crate) entity_stack: Vec<(usize, XMLVersion, Option<String>)>,
59}
60
61#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Hash)]
62pub enum XMLVersion {
63    /// XML 1.0
64    #[default]
65    XML10,
66    /// Unknown version. Treat as specified in XML 1.0.  
67    Unknown,
68}
69
70impl XMLVersion {
71    pub fn is_char(&self, c: impl Into<u32>) -> bool {
72        fn _is_char(_version: XMLVersion, c: u32) -> bool {
73            matches!(
74                c,
75                0x9
76                    | 0xA
77                    | 0xD
78                    | 0x20..= 0xD7FF
79                    | 0xE000..= 0xFFFD
80                    | 0x10000..= 0x10FFFF
81            )
82        }
83        _is_char(*self, c.into())
84    }
85
86    pub fn is_name_start_char(&self, c: impl Into<u32>) -> bool {
87        fn _is_name_start_char(_version: XMLVersion, c: u32) -> bool {
88            matches!(c,
89                0x3A // ':'
90                | 0x41..=0x5A // 'A'..='Z'
91                | 0x5F // '_'
92                | 0x61..=0x7A // 'a'..='z'
93                | 0xC0..=0xD6
94                | 0xD8..=0xF6
95                | 0xF8..=0x2FF
96                | 0x370..=0x37D
97                | 0x37F..=0x1FFF
98                | 0x200C..=0x200D
99                | 0x2070..=0x218F
100                | 0x2C00..=0x2FEF
101                | 0x3001..=0xD7FF
102                | 0xF900..=0xFDCF
103                | 0xFDF0..=0xFFFD
104                | 0x10000..=0xEFFFF
105            )
106        }
107        _is_name_start_char(*self, c.into())
108    }
109
110    pub fn is_name_char(&self, c: impl Into<u32>) -> bool {
111        fn _is_name_char(_version: XMLVersion, c: u32) -> bool {
112            matches!(c,
113                0x2D..=0x2E // '-', '.'
114                | 0x30..=0x3A // '0'..='9', ':'
115                | 0x41..=0x5A // 'A'..='Z'
116                | 0x5F // '_'
117                | 0x61..=0x7A // 'a'..='z'
118                | 0xB7
119                | 0xC0..=0xD6
120                | 0xD8..=0xF6
121                | 0xF8..=0x37D
122                | 0x37F..=0x1FFF
123                | 0x200C..=0x200D
124                | 0x203F..=0x2040
125                | 0x2070..=0x218F
126                | 0x2C00..=0x2FEF
127                | 0x3001..=0xD7FF
128                | 0xF900..=0xFDCF
129                | 0xFDF0..=0xFFFD
130                | 0x10000..=0xEFFFF
131            )
132        }
133        _is_name_char(*self, c.into())
134    }
135
136    /// ```text
137    /// [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]
138    /// ```
139    pub fn is_pubid_char(&self, c: impl Into<u32>) -> bool {
140        fn _is_pubid_char(_version: XMLVersion, c: u32) -> bool {
141            matches!(c,
142                0xA
143                | 0xD
144                | 0x20..=0x21 // SP, '!'
145                | 0x23..=0x25 // [#$%]
146                | 0x27..=0x3B // ['()*+,-./], '0'..='9', [:;]
147                | 0x3D // '='
148                | 0x3F..=0x5A // [?@], 'A'..='Z'
149                | 0x5F // '_'
150                | 0x61..=0x7A // 'a'..='z'
151            )
152        }
153        _is_pubid_char(*self, c.into())
154    }
155
156    pub fn is_whitespace(&self, c: impl Into<u32>) -> bool {
157        let c: u32 = c.into();
158        matches!(c, 0x20 | 0x9 | 0xD | 0xA)
159    }
160}
161
162impl std::fmt::Display for XMLVersion {
163    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
164        match *self {
165            XMLVersion::XML10 => write!(f, "1.0"),
166            XMLVersion::Unknown => write!(f, "1.0"),
167        }
168    }
169}