text_parsing/entities/
state.rs

1
2use super::{
3    entities::{Entity,Instance,ENTITIES},     
4};
5use crate::{
6    Error, Local, ParserEvent,
7    NextResult, Next, StateMachine,
8    SourceEvent, Breaker,
9};
10
11
12#[derive(Debug)]
13pub(in super) enum EntityState {
14    // Entities
15    Init,
16    MayBeEntity(Local<char>),
17    MayBeNumEntity(Local<char>,Local<char>),
18    EntityNamed(ReadEntity),
19    EntityNumber(ReadEntity),
20    EntityNumberX(ReadEntity),
21}
22impl Default for EntityState {
23    fn default() -> EntityState {
24        EntityState::Init
25    }
26}
27
28#[derive(Debug)]
29pub(in super) struct ReadEntity {
30    begin: Local<char>,
31    current: Local<char>,
32    content: String,
33    chars: Vec<Local<char>>,
34}
35impl ReadEntity {
36    fn named_into_state(self) -> NextResult<EntityState,Entity> {
37        let mut ns = Next::empty();
38        match ENTITIES.get(&self.content) {
39            Some(e) => ns = ns.with_event(create_entity_event(self,e)?),
40            None => for c in self.chars {
41                ns = ns.with_event(c.map(|c| ParserEvent::Char(c)));
42            },
43        }
44        Ok(ns)
45    }
46    fn number_into_state(self) -> NextResult<EntityState,Entity> {
47        let mut ns = Next::empty();
48        match match u32::from_str_radix(&self.content,10) {
49            Ok(u) => char::from_u32(u),
50            Err(_) => None,
51        } {
52            Some(e) => ns = ns.with_event(create_entity_event(self,Instance::Char(e))?),
53            None => for c in self.chars {
54                ns = ns.with_event(c.map(|c| ParserEvent::Char(c)));
55            },
56        }
57        Ok(ns)
58    }
59    fn number_x_into_state(self) -> NextResult<EntityState,Entity> {
60        let mut ns = Next::empty();
61        match match u32::from_str_radix(&self.content,16) {
62            Ok(u) => char::from_u32(u),
63            Err(_) => None,
64        } {
65            Some(e) => ns = ns.with_event(create_entity_event(self,Instance::Char(e))?),
66            None => for c in self.chars {
67                ns = ns.with_event(c.map(|c| ParserEvent::Char(c)));
68            },
69        }
70        Ok(ns)
71    }
72    fn failed_into_state(self) -> NextResult<EntityState,Entity> {
73        let mut ns = Next::empty();
74        for c in self.chars {
75            ns = ns.with_event(c.map(|c| ParserEvent::Char(c)));
76        }
77        Ok(ns)
78    }
79}
80
81impl StateMachine for EntityState {
82    type Context = ();
83    type Data = Entity;
84    
85    fn eof(self, _: &Self::Context) -> NextResult<EntityState,Entity> {        
86        Ok(match self {
87            EntityState::Init => Next::empty(),
88            EntityState::MayBeEntity(amp_char) => Next::empty().with_event(amp_char.map(|c| ParserEvent::Char(c))),
89            EntityState::MayBeNumEntity(amp_char,hash_char) => {
90                Next::empty()
91                    .with_event(amp_char.map(|c| ParserEvent::Char(c)))
92                    .with_event(hash_char.map(|c| ParserEvent::Char(c)))
93            },
94            EntityState::EntityNamed(ent) => ent.named_into_state()?,
95            EntityState::EntityNumber(ent) |
96            EntityState::EntityNumberX(ent) => ent.failed_into_state()?,
97        })
98    }
99    fn next_state(self, local_src: Local<SourceEvent>,  _: &Self::Context) -> NextResult<EntityState,Entity> {
100        match self {
101            EntityState::Init => init(local_src),
102            EntityState::MayBeEntity(amp_char) => may_be_entity(amp_char,local_src),
103            EntityState::MayBeNumEntity(amp_char,hash_char) => may_be_num_entity(amp_char,hash_char,local_src),
104            EntityState::EntityNamed(ent) => entity_named(ent,local_src),
105            EntityState::EntityNumber(ent) => entity_number(ent,local_src),
106            EntityState::EntityNumberX(ent) => entity_number_x(ent,local_src),
107        }
108    }
109}
110
111fn init(local_src: Local<SourceEvent>) -> NextResult<EntityState,Entity> {
112    Ok(match *local_src.data() {
113        SourceEvent::Char(lc) => {
114            let local_char = local_src.local(lc);
115            match lc {
116                '&' => Next::empty()
117                    .with_state(EntityState::MayBeEntity(local_char)),
118                _ => Next::empty()
119                    .with_event(local_char.map(|c| ParserEvent::Char(c))),
120            }
121        },
122        SourceEvent::Breaker(b) => match b {
123            Breaker::None => Next::empty(),
124            _ => Next::empty()
125                .with_event(local_src.local(ParserEvent::Breaker(b))),
126        },
127    })
128}
129
130fn create_entity_event(entity: ReadEntity, replace: Instance) -> Result<Local<ParserEvent<Entity>>,Error> {    
131    Local::from_segment(entity.begin,entity.current)
132        .map(|local| {
133            let mut v = String::with_capacity(entity.chars.len());
134            for c in entity.chars {
135                v.push(*c.data());
136            }
137            local.with_inner(ParserEvent::Parsed(Entity{ value: v, entity: replace }))
138        })
139}
140
141fn entity_number_x(mut ent: ReadEntity, local_src: Local<SourceEvent>) -> NextResult<EntityState,Entity> {
142    Ok(match *local_src.data() {
143        SourceEvent::Char(lc) => {
144            let local_char = local_src.local(lc);
145            match lc {
146                '0' ..= '9' | 'a' ..= 'z' | 'A' ..= 'Z' => {
147                    ent.current = local_char;
148                    ent.content.push(*local_char.data());
149                    ent.chars.push(local_char);
150                    Next::empty().with_state(EntityState::EntityNumberX(ent))
151                },
152                ';' => {
153                    ent.current = local_char;
154                    ent.chars.push(local_char);
155                    ent.number_x_into_state()?
156                },
157                '&'=> ent.failed_into_state()?.with_state(EntityState::MayBeEntity(local_char)),
158                _ => ent.failed_into_state()?.with_event(local_char.map(|c| ParserEvent::Char(c))),
159            }
160        },
161        SourceEvent::Breaker(b) => match b {
162            Breaker::None => Next::empty().with_state(EntityState::EntityNumberX(ent)),
163            _ => ent.failed_into_state()?
164                .with_event(local_src.local(ParserEvent::Breaker(b))),
165        },
166    })
167}
168
169fn entity_number(mut ent: ReadEntity, local_src: Local<SourceEvent>) -> NextResult<EntityState,Entity> {
170    Ok(match *local_src.data() {
171        SourceEvent::Char(lc) => {
172            let local_char = local_src.local(lc);
173            match lc {
174                '0' ..= '9' => {
175                    ent.current = local_char;
176                    ent.content.push(*local_char.data());
177                    ent.chars.push(local_char);
178                    Next::empty().with_state(EntityState::EntityNumber(ent))
179                },
180                ';' => {
181                    ent.current = local_char;
182                    ent.chars.push(local_char);
183                    ent.number_into_state()?                
184                },
185                '&'=> ent.failed_into_state()?.with_state(EntityState::MayBeEntity(local_char)),
186                _ => ent.failed_into_state()?.with_event(local_char.map(|c| ParserEvent::Char(c))),
187            }
188        },
189        SourceEvent::Breaker(b) => match b {
190            Breaker::None => Next::empty().with_state(EntityState::EntityNumber(ent)),
191            _ => ent.failed_into_state()?
192                .with_event(local_src.local(ParserEvent::Breaker(b))),
193        },
194    })
195}
196
197fn may_be_num_entity(amp_char: Local<char>, hash_char: Local<char>, local_src: Local<SourceEvent>) -> NextResult<EntityState,Entity> {
198    Ok(match *local_src.data() {
199        SourceEvent::Char(lc) => {
200            let local_char = local_src.local(lc);
201            match lc {
202                'x' => {
203                    let ent = ReadEntity {
204                        begin: amp_char,
205                        current: local_char,
206                        content: String::new(),
207                        chars: vec![amp_char,hash_char,local_char],
208                    };
209                    Next::empty()
210                        .with_state(EntityState::EntityNumberX(ent))
211                },
212                '0' ..= '9' => {
213                    let ent = ReadEntity {
214                        begin: amp_char,
215                        current: local_char,
216                        content: { let mut s = String::new(); s.push(*local_char.data()); s },
217                        chars: vec![amp_char,hash_char,local_char],
218                    };
219                    Next::empty()
220                        .with_state(EntityState::EntityNumber(ent))
221                },
222                '&'=> Next::empty()
223                    .with_state(EntityState::MayBeEntity(local_char))
224                    .with_event(amp_char.map(|c| ParserEvent::Char(c)))
225                    .with_event(hash_char.map(|c| ParserEvent::Char(c))),
226                _ => Next::empty()
227                    .with_event(amp_char.map(|c| ParserEvent::Char(c)))
228                    .with_event(hash_char.map(|c| ParserEvent::Char(c)))
229                    .with_event(local_char.map(|c| ParserEvent::Char(c))),
230            }
231        },
232        SourceEvent::Breaker(b) => match b {
233            Breaker::None => Next::empty().with_state(EntityState::MayBeNumEntity(amp_char,hash_char)),
234            _ => Next::empty()
235                .with_event(amp_char.map(|c| ParserEvent::Char(c)))
236                .with_event(hash_char.map(|c| ParserEvent::Char(c)))
237                .with_event(local_src.local(ParserEvent::Breaker(b))),
238        },
239    })
240}
241
242fn entity_named(mut ent: ReadEntity, local_src: Local<SourceEvent>) -> NextResult<EntityState,Entity> {
243    Ok(match *local_src.data() {
244        SourceEvent::Char(lc) => {
245            let local_char = local_src.local(lc);
246            match lc {
247                ':' | '_' | 'A' ..= 'Z' | 'a' ..= 'z' | '\u{C0}' ..= '\u{D6}' | '\u{D8}' ..= '\u{F6}' | '\u{F8}' ..= '\u{2FF}' |
248                '\u{370}' ..= '\u{37D}' | '\u{37F}' ..= '\u{1FFF}' | '\u{200C}' ..= '\u{200D}' | '\u{2070}' ..= '\u{218F}' |
249                '\u{2C00}' ..= '\u{2FEF}' | '\u{3001}' ..= '\u{D7FF}' | '\u{F900}' ..= '\u{FDCF}' | '\u{FDF0}' ..= '\u{FFFD}' |
250                '\u{10000}' ..= '\u{EFFFF}' |
251                '-' | '.' | '0' ..= '9' | '\u{B7}' | '\u{0300}' ..= '\u{036F}' | '\u{203F}' ..= '\u{2040}' => {
252                    ent.current = local_char;
253                    ent.content.push(*local_char.data());
254                    ent.chars.push(local_char);
255                    Next::empty().with_state(EntityState::EntityNamed(ent))
256                },
257                ';' => {
258                    ent.current = local_char;
259                    ent.content.push(*local_char.data());
260                    ent.chars.push(local_char);
261                    ent.named_into_state()?                
262                },
263                '&'=> ent.named_into_state()?.with_state(EntityState::MayBeEntity(local_char)),
264                _ => ent.named_into_state()?.with_event(local_char.map(|c| ParserEvent::Char(c))),
265            }
266        },
267        SourceEvent::Breaker(b) => match b {
268            Breaker::None => Next::empty().with_state(EntityState::EntityNamed(ent)),
269            _ => ent.named_into_state()?
270                .with_event(local_src.local(ParserEvent::Breaker(b))),
271        },
272    })
273}
274
275fn may_be_entity(amp_char: Local<char>, local_src: Local<SourceEvent>) -> NextResult<EntityState,Entity> {
276    Ok(match *local_src.data() {
277        SourceEvent::Char(lc) => {
278            let local_char = local_src.local(lc);
279            match lc {
280                '#' => Next::empty().with_state(EntityState::MayBeNumEntity(amp_char,local_char)),
281                ':' | '_' | 'A' ..= 'Z' | 'a' ..= 'z' | '\u{C0}' ..= '\u{D6}' | '\u{D8}' ..= '\u{F6}' | '\u{F8}' ..= '\u{2FF}' |
282                '\u{370}' ..= '\u{37D}' | '\u{37F}' ..= '\u{1FFF}' | '\u{200C}' ..= '\u{200D}' | '\u{2070}' ..= '\u{218F}' |
283                '\u{2C00}' ..= '\u{2FEF}' | '\u{3001}' ..= '\u{D7FF}' | '\u{F900}' ..= '\u{FDCF}' | '\u{FDF0}' ..= '\u{FFFD}' |
284                '\u{10000}' ..= '\u{EFFFF}' => {
285                    let ent = ReadEntity {
286                        begin: amp_char,
287                        current: local_char,
288                        content: {
289                            let mut s = String::new();
290                            s.push(*amp_char.data());
291                            s.push(*local_char.data());
292                            s
293                        },
294                        chars: vec![amp_char,local_char],
295                    };
296                    Next::empty().with_state(EntityState::EntityNamed(ent))
297                }
298                '&' => Next::empty()
299                    .with_state(EntityState::MayBeEntity(local_char))
300                    .with_event(amp_char.map(|c| ParserEvent::Char(c))),
301                _ => Next::empty()
302                    .with_event(amp_char.map(|c| ParserEvent::Char(c)))
303                    .with_event(local_char.map(|c| ParserEvent::Char(c))),
304            }
305        },
306        SourceEvent::Breaker(b) => match b {
307            Breaker::None => Next::empty().with_state(EntityState::MayBeEntity(amp_char)),
308            _ => Next::empty()
309                .with_event(amp_char.map(|c| ParserEvent::Char(c)))
310                .with_event(local_src.local(ParserEvent::Breaker(b))),
311        },
312    })
313}