1use crate::*;
2use std::collections::BTreeMap;
3
4pub fn html(w: &mut Writer, source: &[u8]) {
6 let mut p = Parser::new(source);
7 p.read_token();
8 html_inner(w, &mut p, b"");
9}
10
11#[derive(Debug)]
12enum Token {
13 Text,
14 Tag,
15 WhiteSpace,
16 Eof,
17}
18
19struct Parser<'a> {
20 source: &'a [u8],
21 position: usize,
22 token_start: usize,
23 token_end: usize,
24 end_tag: bool,
25 token: Token,
26 attr: BTreeMap<&'a [u8], &'a [u8]>,
27}
28
29impl<'a> Parser<'a> {
30 fn new(source: &'a [u8]) -> Self {
31 Self {
32 source,
33 position: 0,
34 token_start: 0,
35 token_end: 0,
36 end_tag: false,
37 token: Token::Eof,
38 attr: BTreeMap::new(),
39 }
40 }
41
42 fn tvalue(&self) -> &'a [u8] {
43 &self.source[self.token_start..self.token_end]
44 }
45
46 fn avalue(&self, name: &'a [u8]) -> Option<&&'a [u8]> {
48 self.attr.get(name)
49 }
50
51 fn aint(&self, name: &'a [u8]) -> Option<Px> {
53 if let Some(s) = self.avalue(name)
54 && let Ok(x) = tos(s).parse::<Px>()
55 {
56 return Some(x);
57 }
58 None
59 }
60
61 fn next(&mut self) -> u8 {
62 if self.position == self.source.len() {
63 0
64 } else {
65 let c = self.source[self.position];
66 self.position += 1;
67 c
68 }
69 }
70
71 fn next_non_space(&mut self) -> u8 {
72 loop {
73 let c = self.next();
74 if c != b' ' {
75 return c;
76 }
77 }
78 }
79
80 fn read_tag_attributes(&mut self) {
81 loop {
83 let mut c = self.next_non_space();
84 let attr_name_start = self.position - 1;
85 while c != b'=' && c != b' ' && c != b'>' && c != 0 {
86 c = self.next();
87 }
88 if c == b'>' {
89 return;
90 }
91 let attr_name = &self.source[attr_name_start..self.position - 1];
92 if c == b' ' {
93 c = self.next_non_space();
94 }
95 if c != b'=' {
96 return;
97 }
98 c = self.next_non_space();
99 let start = self.position - 1;
100 let attr = if c == b'"' {
101 c = self.next();
103 while c != b'"' && c != 0 {
104 c = self.next();
105 }
106 if c != b'"' {
107 return;
108 }
109 &self.source[start + 1..self.position - 1]
110 } else {
111 while c != b' ' && c != b'>' && c != 0 {
113 c = self.next();
114 }
115 &self.source[start..self.position - 1]
116 };
117 self.attr.insert(attr_name, attr);
118 if c == b'>' {
119 return;
120 }
121 }
122 }
123
124 fn read_token(&mut self) {
125 let c = self.next();
126 if c == 0 {
127 self.token = Token::Eof;
128 } else if c == b' ' || c == b'\n' {
129 self.token = Token::WhiteSpace;
130 loop {
131 let c = self.next();
132 if c != b' ' || c != b'\n' {
133 if c != 0 {
134 self.position -= 1;
135 }
136 break;
137 }
138 }
139 } else if c == b'<' {
140 self.token = Token::Tag;
142 self.token_start = self.position;
143 self.end_tag = false;
144 let mut c = self.next();
145 if c == b'/' {
146 self.end_tag = true;
147 self.token_start = self.position;
148 c = self.next();
149 }
150 loop {
151 if c == b' ' {
153 self.token_end = self.position - 1;
154 self.read_tag_attributes();
155 break;
156 } else if c == b'>' {
157 self.token_end = self.position - 1;
158 break; } else if c == 0 {
160 self.token = Token::Eof; return;
162 } else {
163 c = self.next();
164 }
165 }
166 } else {
167 self.token = Token::Text;
168 self.token_start = self.position - 1;
169 let mut c = self.next();
170 loop {
171 if c == b'<' || c == b' ' || c == b'\n' {
172 self.position -= 1;
173 self.token_end = self.position;
174 break;
175 } else if c == 0 {
176 self.token_end = self.position;
177 break;
178 }
179 c = self.next();
180 }
181 }
182 }
183}
184
185fn html_inner(w: &mut Writer, p: &mut Parser, endtag: &[u8]) {
186 loop {
187 match p.token {
188 Token::Eof => {
189 return;
190 }
191 Token::WhiteSpace => {
192 w.space();
193 p.read_token();
194 }
195 Token::Text => {
196 let s = tos(p.tvalue());
197 let s = &html_escape::decode_html_entities(s);
198 w.text(s);
199 p.read_token();
200 }
201 Token::Tag => {
202 let tag = p.tvalue();
203 if p.end_tag {
204 if tag == endtag {
205 p.read_token();
206 }
207 return;
208 } else if tag == b"p" && tag == endtag {
209 return;
210 }
211 p.read_token();
212 if tag == b"br" || tag == b"br/" {
213 w.output_line();
214 } else if tag == b"img" {
215 if let Some(src) = p.avalue(b"src") {
216 let width = p.aint(b"width");
217 let height = p.aint(b"height");
218 w.image(tos(src), width, height);
219 }
220 } else {
221 let save_mode = w.mode;
222 let save_font = w.cur_font;
223 let save_font_size = w.font_size;
224 let mut save: Px = 0;
225 match tag {
226 b"p" => w.output_line(),
227 b"h1" => {
228 w.font_size = 14;
229 w.output_line();
230 save = if w.center { 1 } else { 0 };
231 w.center = true;
232 }
233 b"b" => w.cur_font |= 1,
234 b"i" => w.cur_font |= 2,
235 b"title" => w.mode = Mode::Title,
236 b"html" | b"head" => w.mode = Mode::Head,
237 b"body" => w.mode = Mode::Normal,
238 b"sup" => {
239 save = w.sup;
240 w.set_sup(w.font_size / 2);
241 }
242 b"sub" => {
243 save = w.sup;
244 w.set_sup(-w.font_size / 2);
245 }
246 _ => {}
247 }
248 html_inner(w, p, tag);
249 w.mode = save_mode;
250 w.font_size = save_font_size;
251 w.cur_font = save_font;
252 match tag {
253 b"sup" | b"sub" => w.set_sup(save),
254 b"h1" => {
255 w.output_line();
256 w.center = save == 1;
257 }
258 _ => {}
259 }
260 }
261 }
262 }
263 }
264}
265
266fn tos(s: &[u8]) -> &str {
268 std::str::from_utf8(s).unwrap()
269}