1#[derive(Debug)]
2pub enum Event {
3 StartElement(String),
4 EndElement(String),
5 TextContent(String),
6 Attribute(String, String),
7}
8
9pub struct Lexer {
10 content: Vec<char>,
11 tag_stack: Vec<String>,
12 events: Vec<Event>,
13}
14
15impl Lexer {
16 pub fn new(content: String) -> Self {
17 Self {
18 content: content.chars().collect::<Vec<char>>(),
19 tag_stack: Vec::new(),
20 events: Vec::new(),
21 }
22 }
23
24 pub fn parse(&mut self) -> &[Event] {
25 while !self.content.is_empty() {
26 self.take_whitespaces();
27
28 if self.content.is_empty() {
29 break;
30 }
31
32 if self.content.len() > 1 && self.content[0] == '<' && self.content[1] == '/' {
38 self.take_end_element();
39 }
40
41 if !self.content.is_empty() && self.content[0] == '<' {
43 self.take_start_element();
44 }
45
46 if !self.content.is_empty() {
48 self.take_text_content();
49 }
50 }
51
52 &self.events
53 }
54
55 fn take_end_element(&mut self) {
57 let tag_name = self.take_tag_name(2);
58 self.events.push(Event::EndElement(tag_name.clone()));
59
60 if self.tag_stack.last() == Some(&tag_name) {
62 self.tag_stack.pop();
63 } else {
64 eprintln!("ERROR: Invalid closing tag `{tag_name}`.")
65 }
66
67 self.take_whitespaces();
68
69 if self.content[0] == '>' {
70 self.get_slice(0, 1);
71 } else {
72 eprintln!("ERROR: Invalid closing tag with extra args.");
73 }
74 }
75
76 fn take_start_element(&mut self) {
78 let tag_name = self.take_tag_name(1);
79 self.events.push(Event::StartElement(tag_name.clone()));
80 self.tag_stack.push(tag_name);
81
82 self.take_attributes();
83 self.take_whitespaces();
84
85 if self.content.len() > 1 && self.content[0] == '/' && self.content[1] == '>' {
87 self.get_slice(0, 2);
88
89 if let Some(last_tag) = self.tag_stack.pop() {
90 self.events.push(Event::EndElement(last_tag));
91 } else {
92 eprintln!("ERROR: there is no tag.");
93 }
94 }
95 else if self.content[0] == '>' {
97 self.get_slice(0, 1);
98 } else {
99 eprintln!("ERROR: expected `>` on start element.");
100 }
101 }
102
103 fn take_tag_name(&mut self, start: usize) -> String {
104 self.take_while_from(start, |x| x.is_alphabetic() || x.is_alphanumeric())
105 }
106
107 fn take_text_content(&mut self) {
108 let value = self.take_while(|x| x != '<');
109 let value = value.replace("\n", "");
110 let value = value.replace("\t", "");
111 let value = value.trim().to_string();
112
113 if value.is_empty() {
114 return;
115 }
116
117 self.events.push(Event::TextContent(value));
118 }
119
120 fn take_attributes(&mut self) {
121 while (self.content[0] != '>') && (self.content[0] != '/') {
124 self.take_attribute()
125 }
126 }
127
128 fn take_attribute(&mut self) {
130 self.take_whitespaces();
131
132 let key =
133 self.take_while(|x| x.is_alphabetic() || x.is_alphanumeric() || x == '-' || x == '_');
134
135 let value = self.take_attribute_value();
136
137 self.events.push(Event::Attribute(key, value));
138
139 self.take_whitespaces();
140 }
141
142 fn take_attribute_value(&mut self) -> String {
143 if self.content.is_empty() || self.content[0] != '=' {
144 return String::from("");
145 }
146
147 self.get_slice(0, 1);
148
149 let mut qoute_count = 0;
150
151 let value = self.take_while(|x| {
153 if x == '"' {
155 qoute_count += 1;
156 return if qoute_count == 2 { false } else { true };
157 }
158
159 if qoute_count == 1 {
161 return true;
162 }
163
164 x != ' ' || x != '>' || x != '/'
165 });
166
167 if qoute_count == 0 {
168 return value;
169 }
170
171 self.get_slice(0, 1);
174
175 value[1..].to_string()
176 }
177
178 fn take_whitespaces(&mut self) {
179 self.take_while(|x| x.is_whitespace());
180 }
181
182 fn take_while<F>(&mut self, predict: F) -> String
183 where
184 F: FnMut(char) -> bool,
185 {
186 self.take_while_from(0, predict)
187 }
188
189 fn take_while_from<F>(&mut self, start: usize, mut predict: F) -> String
190 where
191 F: FnMut(char) -> bool,
192 {
193 let mut i = start;
194
195 while self.content.len() > i && predict(self.content[i]) {
196 i += 1;
197 }
198
199 self.get_slice(start, i)
200 }
201
202 fn get_slice(&mut self, from: usize, to: usize) -> String {
203 let value = self.content[from..to].iter().collect::<String>();
204 self.content = self.content[to..].to_vec();
205
206 value
207 }
208}