1
2use super::{
3 entities::{Entity,Instance,ENTITIES},
4};
5use crate::{
6 Error, Local, ParserEvent,
7 NextResult, Next, StateMachine,
8 SourceEvent, Breaker,
9};
10
11
12#[derive(Debug)]
13pub(in super) enum EntityState {
14 Init,
16 MayBeEntity(Local<char>),
17 MayBeNumEntity(Local<char>,Local<char>),
18 EntityNamed(ReadEntity),
19 EntityNumber(ReadEntity),
20 EntityNumberX(ReadEntity),
21}
22impl Default for EntityState {
23 fn default() -> EntityState {
24 EntityState::Init
25 }
26}
27
28#[derive(Debug)]
29pub(in super) struct ReadEntity {
30 begin: Local<char>,
31 current: Local<char>,
32 content: String,
33 chars: Vec<Local<char>>,
34}
35impl ReadEntity {
36 fn named_into_state(self) -> NextResult<EntityState,Entity> {
37 let mut ns = Next::empty();
38 match ENTITIES.get(&self.content) {
39 Some(e) => ns = ns.with_event(create_entity_event(self,e)?),
40 None => for c in self.chars {
41 ns = ns.with_event(c.map(|c| ParserEvent::Char(c)));
42 },
43 }
44 Ok(ns)
45 }
46 fn number_into_state(self) -> NextResult<EntityState,Entity> {
47 let mut ns = Next::empty();
48 match match u32::from_str_radix(&self.content,10) {
49 Ok(u) => char::from_u32(u),
50 Err(_) => None,
51 } {
52 Some(e) => ns = ns.with_event(create_entity_event(self,Instance::Char(e))?),
53 None => for c in self.chars {
54 ns = ns.with_event(c.map(|c| ParserEvent::Char(c)));
55 },
56 }
57 Ok(ns)
58 }
59 fn number_x_into_state(self) -> NextResult<EntityState,Entity> {
60 let mut ns = Next::empty();
61 match match u32::from_str_radix(&self.content,16) {
62 Ok(u) => char::from_u32(u),
63 Err(_) => None,
64 } {
65 Some(e) => ns = ns.with_event(create_entity_event(self,Instance::Char(e))?),
66 None => for c in self.chars {
67 ns = ns.with_event(c.map(|c| ParserEvent::Char(c)));
68 },
69 }
70 Ok(ns)
71 }
72 fn failed_into_state(self) -> NextResult<EntityState,Entity> {
73 let mut ns = Next::empty();
74 for c in self.chars {
75 ns = ns.with_event(c.map(|c| ParserEvent::Char(c)));
76 }
77 Ok(ns)
78 }
79}
80
81impl StateMachine for EntityState {
82 type Context = ();
83 type Data = Entity;
84
85 fn eof(self, _: &Self::Context) -> NextResult<EntityState,Entity> {
86 Ok(match self {
87 EntityState::Init => Next::empty(),
88 EntityState::MayBeEntity(amp_char) => Next::empty().with_event(amp_char.map(|c| ParserEvent::Char(c))),
89 EntityState::MayBeNumEntity(amp_char,hash_char) => {
90 Next::empty()
91 .with_event(amp_char.map(|c| ParserEvent::Char(c)))
92 .with_event(hash_char.map(|c| ParserEvent::Char(c)))
93 },
94 EntityState::EntityNamed(ent) => ent.named_into_state()?,
95 EntityState::EntityNumber(ent) |
96 EntityState::EntityNumberX(ent) => ent.failed_into_state()?,
97 })
98 }
99 fn next_state(self, local_src: Local<SourceEvent>, _: &Self::Context) -> NextResult<EntityState,Entity> {
100 match self {
101 EntityState::Init => init(local_src),
102 EntityState::MayBeEntity(amp_char) => may_be_entity(amp_char,local_src),
103 EntityState::MayBeNumEntity(amp_char,hash_char) => may_be_num_entity(amp_char,hash_char,local_src),
104 EntityState::EntityNamed(ent) => entity_named(ent,local_src),
105 EntityState::EntityNumber(ent) => entity_number(ent,local_src),
106 EntityState::EntityNumberX(ent) => entity_number_x(ent,local_src),
107 }
108 }
109}
110
111fn init(local_src: Local<SourceEvent>) -> NextResult<EntityState,Entity> {
112 Ok(match *local_src.data() {
113 SourceEvent::Char(lc) => {
114 let local_char = local_src.local(lc);
115 match lc {
116 '&' => Next::empty()
117 .with_state(EntityState::MayBeEntity(local_char)),
118 _ => Next::empty()
119 .with_event(local_char.map(|c| ParserEvent::Char(c))),
120 }
121 },
122 SourceEvent::Breaker(b) => match b {
123 Breaker::None => Next::empty(),
124 _ => Next::empty()
125 .with_event(local_src.local(ParserEvent::Breaker(b))),
126 },
127 })
128}
129
130fn create_entity_event(entity: ReadEntity, replace: Instance) -> Result<Local<ParserEvent<Entity>>,Error> {
131 Local::from_segment(entity.begin,entity.current)
132 .map(|local| {
133 let mut v = String::with_capacity(entity.chars.len());
134 for c in entity.chars {
135 v.push(*c.data());
136 }
137 local.with_inner(ParserEvent::Parsed(Entity{ value: v, entity: replace }))
138 })
139}
140
141fn entity_number_x(mut ent: ReadEntity, local_src: Local<SourceEvent>) -> NextResult<EntityState,Entity> {
142 Ok(match *local_src.data() {
143 SourceEvent::Char(lc) => {
144 let local_char = local_src.local(lc);
145 match lc {
146 '0' ..= '9' | 'a' ..= 'z' | 'A' ..= 'Z' => {
147 ent.current = local_char;
148 ent.content.push(*local_char.data());
149 ent.chars.push(local_char);
150 Next::empty().with_state(EntityState::EntityNumberX(ent))
151 },
152 ';' => {
153 ent.current = local_char;
154 ent.chars.push(local_char);
155 ent.number_x_into_state()?
156 },
157 '&'=> ent.failed_into_state()?.with_state(EntityState::MayBeEntity(local_char)),
158 _ => ent.failed_into_state()?.with_event(local_char.map(|c| ParserEvent::Char(c))),
159 }
160 },
161 SourceEvent::Breaker(b) => match b {
162 Breaker::None => Next::empty().with_state(EntityState::EntityNumberX(ent)),
163 _ => ent.failed_into_state()?
164 .with_event(local_src.local(ParserEvent::Breaker(b))),
165 },
166 })
167}
168
169fn entity_number(mut ent: ReadEntity, local_src: Local<SourceEvent>) -> NextResult<EntityState,Entity> {
170 Ok(match *local_src.data() {
171 SourceEvent::Char(lc) => {
172 let local_char = local_src.local(lc);
173 match lc {
174 '0' ..= '9' => {
175 ent.current = local_char;
176 ent.content.push(*local_char.data());
177 ent.chars.push(local_char);
178 Next::empty().with_state(EntityState::EntityNumber(ent))
179 },
180 ';' => {
181 ent.current = local_char;
182 ent.chars.push(local_char);
183 ent.number_into_state()?
184 },
185 '&'=> ent.failed_into_state()?.with_state(EntityState::MayBeEntity(local_char)),
186 _ => ent.failed_into_state()?.with_event(local_char.map(|c| ParserEvent::Char(c))),
187 }
188 },
189 SourceEvent::Breaker(b) => match b {
190 Breaker::None => Next::empty().with_state(EntityState::EntityNumber(ent)),
191 _ => ent.failed_into_state()?
192 .with_event(local_src.local(ParserEvent::Breaker(b))),
193 },
194 })
195}
196
197fn may_be_num_entity(amp_char: Local<char>, hash_char: Local<char>, local_src: Local<SourceEvent>) -> NextResult<EntityState,Entity> {
198 Ok(match *local_src.data() {
199 SourceEvent::Char(lc) => {
200 let local_char = local_src.local(lc);
201 match lc {
202 'x' => {
203 let ent = ReadEntity {
204 begin: amp_char,
205 current: local_char,
206 content: String::new(),
207 chars: vec![amp_char,hash_char,local_char],
208 };
209 Next::empty()
210 .with_state(EntityState::EntityNumberX(ent))
211 },
212 '0' ..= '9' => {
213 let ent = ReadEntity {
214 begin: amp_char,
215 current: local_char,
216 content: { let mut s = String::new(); s.push(*local_char.data()); s },
217 chars: vec![amp_char,hash_char,local_char],
218 };
219 Next::empty()
220 .with_state(EntityState::EntityNumber(ent))
221 },
222 '&'=> Next::empty()
223 .with_state(EntityState::MayBeEntity(local_char))
224 .with_event(amp_char.map(|c| ParserEvent::Char(c)))
225 .with_event(hash_char.map(|c| ParserEvent::Char(c))),
226 _ => Next::empty()
227 .with_event(amp_char.map(|c| ParserEvent::Char(c)))
228 .with_event(hash_char.map(|c| ParserEvent::Char(c)))
229 .with_event(local_char.map(|c| ParserEvent::Char(c))),
230 }
231 },
232 SourceEvent::Breaker(b) => match b {
233 Breaker::None => Next::empty().with_state(EntityState::MayBeNumEntity(amp_char,hash_char)),
234 _ => Next::empty()
235 .with_event(amp_char.map(|c| ParserEvent::Char(c)))
236 .with_event(hash_char.map(|c| ParserEvent::Char(c)))
237 .with_event(local_src.local(ParserEvent::Breaker(b))),
238 },
239 })
240}
241
242fn entity_named(mut ent: ReadEntity, local_src: Local<SourceEvent>) -> NextResult<EntityState,Entity> {
243 Ok(match *local_src.data() {
244 SourceEvent::Char(lc) => {
245 let local_char = local_src.local(lc);
246 match lc {
247 ':' | '_' | 'A' ..= 'Z' | 'a' ..= 'z' | '\u{C0}' ..= '\u{D6}' | '\u{D8}' ..= '\u{F6}' | '\u{F8}' ..= '\u{2FF}' |
248 '\u{370}' ..= '\u{37D}' | '\u{37F}' ..= '\u{1FFF}' | '\u{200C}' ..= '\u{200D}' | '\u{2070}' ..= '\u{218F}' |
249 '\u{2C00}' ..= '\u{2FEF}' | '\u{3001}' ..= '\u{D7FF}' | '\u{F900}' ..= '\u{FDCF}' | '\u{FDF0}' ..= '\u{FFFD}' |
250 '\u{10000}' ..= '\u{EFFFF}' |
251 '-' | '.' | '0' ..= '9' | '\u{B7}' | '\u{0300}' ..= '\u{036F}' | '\u{203F}' ..= '\u{2040}' => {
252 ent.current = local_char;
253 ent.content.push(*local_char.data());
254 ent.chars.push(local_char);
255 Next::empty().with_state(EntityState::EntityNamed(ent))
256 },
257 ';' => {
258 ent.current = local_char;
259 ent.content.push(*local_char.data());
260 ent.chars.push(local_char);
261 ent.named_into_state()?
262 },
263 '&'=> ent.named_into_state()?.with_state(EntityState::MayBeEntity(local_char)),
264 _ => ent.named_into_state()?.with_event(local_char.map(|c| ParserEvent::Char(c))),
265 }
266 },
267 SourceEvent::Breaker(b) => match b {
268 Breaker::None => Next::empty().with_state(EntityState::EntityNamed(ent)),
269 _ => ent.named_into_state()?
270 .with_event(local_src.local(ParserEvent::Breaker(b))),
271 },
272 })
273}
274
275fn may_be_entity(amp_char: Local<char>, local_src: Local<SourceEvent>) -> NextResult<EntityState,Entity> {
276 Ok(match *local_src.data() {
277 SourceEvent::Char(lc) => {
278 let local_char = local_src.local(lc);
279 match lc {
280 '#' => Next::empty().with_state(EntityState::MayBeNumEntity(amp_char,local_char)),
281 ':' | '_' | 'A' ..= 'Z' | 'a' ..= 'z' | '\u{C0}' ..= '\u{D6}' | '\u{D8}' ..= '\u{F6}' | '\u{F8}' ..= '\u{2FF}' |
282 '\u{370}' ..= '\u{37D}' | '\u{37F}' ..= '\u{1FFF}' | '\u{200C}' ..= '\u{200D}' | '\u{2070}' ..= '\u{218F}' |
283 '\u{2C00}' ..= '\u{2FEF}' | '\u{3001}' ..= '\u{D7FF}' | '\u{F900}' ..= '\u{FDCF}' | '\u{FDF0}' ..= '\u{FFFD}' |
284 '\u{10000}' ..= '\u{EFFFF}' => {
285 let ent = ReadEntity {
286 begin: amp_char,
287 current: local_char,
288 content: {
289 let mut s = String::new();
290 s.push(*amp_char.data());
291 s.push(*local_char.data());
292 s
293 },
294 chars: vec![amp_char,local_char],
295 };
296 Next::empty().with_state(EntityState::EntityNamed(ent))
297 }
298 '&' => Next::empty()
299 .with_state(EntityState::MayBeEntity(local_char))
300 .with_event(amp_char.map(|c| ParserEvent::Char(c))),
301 _ => Next::empty()
302 .with_event(amp_char.map(|c| ParserEvent::Char(c)))
303 .with_event(local_char.map(|c| ParserEvent::Char(c))),
304 }
305 },
306 SourceEvent::Breaker(b) => match b {
307 Breaker::None => Next::empty().with_state(EntityState::MayBeEntity(amp_char)),
308 _ => Next::empty()
309 .with_event(amp_char.map(|c| ParserEvent::Char(c)))
310 .with_event(local_src.local(ParserEvent::Breaker(b))),
311 },
312 })
313}