1#![doc = include_str!("../README.md")]
2
3pub mod automata;
4pub mod catalog;
5pub mod encoding;
6pub mod error;
7mod parse;
8mod save;
9pub mod sax;
10pub mod stax;
11pub mod tree;
12pub mod uri;
13pub mod xpath;
14
15use std::marker::PhantomData;
16
17use crate::sax::{parser::ParserSubState, source::InputSource};
18
19const XML_VERSION_NUM_LIMIT_LENGTH: usize = 128;
21const ENCODING_NAME_LIMIT_LENGTH: usize = 128;
23const CHARDATA_CHUNK_LENGTH: usize = 4096;
25
26const XML_XML_NAMESPACE: &str = "http://www.w3.org/XML/1998/namespace";
27const XML_NS_NAMESPACE: &str = "http://www.w3.org/2000/xmlns/";
28
29pub trait ParserSpec {
30 type Reader;
31 type SpecificContext;
32}
33
34pub struct DefaultParserSpec<'a> {
35 _phantom: PhantomData<&'a ()>,
36}
37
38impl<'a> ParserSpec for DefaultParserSpec<'a> {
39 type Reader = InputSource<'a>;
40 type SpecificContext = ();
41}
42
43pub struct ProgressiveParserSpec;
44
45impl ParserSpec for ProgressiveParserSpec {
46 type Reader = InputSource<'static>;
47 type SpecificContext = ProgressiveParserSpecificContext;
48}
49
50#[derive(Debug, Default)]
51pub struct ProgressiveParserSpecificContext {
52 pub(crate) seen: usize,
53 pub(crate) quote: u8,
54 pub(crate) sub_state: ParserSubState,
55 pub(crate) element_stack: Vec<(String, usize, usize)>,
57 pub(crate) entity_stack: Vec<(usize, XMLVersion, Option<String>)>,
59}
60
61#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Hash)]
62pub enum XMLVersion {
63 #[default]
65 XML10,
66 Unknown,
68}
69
70impl XMLVersion {
71 pub fn is_char(&self, c: impl Into<u32>) -> bool {
72 fn _is_char(_version: XMLVersion, c: u32) -> bool {
73 matches!(
74 c,
75 0x9
76 | 0xA
77 | 0xD
78 | 0x20..= 0xD7FF
79 | 0xE000..= 0xFFFD
80 | 0x10000..= 0x10FFFF
81 )
82 }
83 _is_char(*self, c.into())
84 }
85
86 pub fn is_name_start_char(&self, c: impl Into<u32>) -> bool {
87 fn _is_name_start_char(_version: XMLVersion, c: u32) -> bool {
88 matches!(c,
89 0x3A | 0x41..=0x5A | 0x5F | 0x61..=0x7A | 0xC0..=0xD6
94 | 0xD8..=0xF6
95 | 0xF8..=0x2FF
96 | 0x370..=0x37D
97 | 0x37F..=0x1FFF
98 | 0x200C..=0x200D
99 | 0x2070..=0x218F
100 | 0x2C00..=0x2FEF
101 | 0x3001..=0xD7FF
102 | 0xF900..=0xFDCF
103 | 0xFDF0..=0xFFFD
104 | 0x10000..=0xEFFFF
105 )
106 }
107 _is_name_start_char(*self, c.into())
108 }
109
110 pub fn is_name_char(&self, c: impl Into<u32>) -> bool {
111 fn _is_name_char(_version: XMLVersion, c: u32) -> bool {
112 matches!(c,
113 0x2D..=0x2E | 0x30..=0x3A | 0x41..=0x5A | 0x5F | 0x61..=0x7A | 0xB7
119 | 0xC0..=0xD6
120 | 0xD8..=0xF6
121 | 0xF8..=0x37D
122 | 0x37F..=0x1FFF
123 | 0x200C..=0x200D
124 | 0x203F..=0x2040
125 | 0x2070..=0x218F
126 | 0x2C00..=0x2FEF
127 | 0x3001..=0xD7FF
128 | 0xF900..=0xFDCF
129 | 0xFDF0..=0xFFFD
130 | 0x10000..=0xEFFFF
131 )
132 }
133 _is_name_char(*self, c.into())
134 }
135
136 pub fn is_pubid_char(&self, c: impl Into<u32>) -> bool {
140 fn _is_pubid_char(_version: XMLVersion, c: u32) -> bool {
141 matches!(c,
142 0xA
143 | 0xD
144 | 0x20..=0x21 | 0x23..=0x25 | 0x27..=0x3B | 0x3D | 0x3F..=0x5A | 0x5F | 0x61..=0x7A )
152 }
153 _is_pubid_char(*self, c.into())
154 }
155
156 pub fn is_whitespace(&self, c: impl Into<u32>) -> bool {
157 let c: u32 = c.into();
158 matches!(c, 0x20 | 0x9 | 0xD | 0xA)
159 }
160}
161
162impl std::fmt::Display for XMLVersion {
163 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
164 match *self {
165 XMLVersion::XML10 => write!(f, "1.0"),
166 XMLVersion::Unknown => write!(f, "1.0"),
167 }
168 }
169}