htmlstream/
attribute.rs

1use base::{Position, HTMLTagAttribute};
2
3
4#[derive(Debug)]
5pub struct HTMLTagAttributeIterator<'a> {
6    pub html: &'a str,
7
8    is_quote_start: bool,
9    is_attribute_start: bool,
10    is_get_attribute_name: bool,
11
12    quote_char: u8,
13    last_char: u8,
14    last_index: usize,
15    current_index: usize,
16    value_start_index: usize,
17
18    html_bytes: &'a [u8],
19    html_len: usize,
20}
21
22impl<'a> HTMLTagAttributeIterator<'a> {
23    fn new(html: &'a str) -> HTMLTagAttributeIterator<'a> {
24        HTMLTagAttributeIterator {
25            html: html,
26            is_quote_start: false,
27            is_attribute_start: false,
28            is_get_attribute_name: false,
29            quote_char: 0,
30            last_char: 0,
31            last_index: 0,
32            current_index: 0,
33            value_start_index: 0,
34            html_bytes: html.as_bytes(),
35            html_len: html.len()
36        }
37    }
38
39    #[inline]
40    fn finished_item(&mut self) {
41        self.is_attribute_start = false;
42        self.is_quote_start = false;
43        self.is_get_attribute_name = false;
44        self.last_index = self.current_index - 1;
45    }
46}
47
48impl<'a> Iterator for HTMLTagAttributeIterator<'a> {
49    type Item = (Position, HTMLTagAttribute);
50
51    fn next(&mut self) -> Option<(Position, HTMLTagAttribute)> {
52        while self.current_index < self.html_len {
53            let c = self.html_bytes[self.current_index];
54            if self.current_index > 0 {
55                self.last_char = self.html_bytes[self.current_index - 1];;
56            }
57            self.current_index += 1;
58
59            if self.is_attribute_start {
60
61                if self.is_get_attribute_name {
62
63                    if self.is_quote_start {
64                        if c == self.quote_char {
65                            // only when match the same `quote` char
66                            if c == self.quote_char {
67                                let name = &self.html[self.last_index..(self.value_start_index - 1)];
68                                let value = &self.html[(self.value_start_index + 1)..(self.current_index - 1)];
69                                let position = Position { start: self.last_index, end: self.current_index };
70                                let attribute = HTMLTagAttribute {
71                                    name: name.to_string(),
72                                    value: value.to_string(),
73                                };
74                                self.finished_item();
75                                return Some((position, attribute));
76                            }
77                        } else {
78                            continue;
79                        }
80                    }
81
82                    // quote start
83                    if b'\'' == c || b'"' == c {
84                        // only when the last char is `equal`
85                        if b'=' == self.last_char {
86                            self.is_quote_start = true;
87                            self.quote_char = c;
88                        }
89                        continue;
90                    }
91
92                    // only when match a `blank` char
93                    if c <= b' ' {
94                        let name = &self.html[self.last_index..(self.value_start_index - 1)];
95                        let value = &self.html[(self.value_start_index)..(self.current_index - 1)];
96                        let position = Position { start: self.last_index, end: self.current_index - 1 };
97                        let attribute = HTMLTagAttribute {
98                            name: name.to_string(),
99                            value: value.to_string(),
100                        };
101                        self.finished_item();
102                        return Some((position, attribute));
103                    }
104
105                } else {
106
107                    // only when match an `equal` char, start the attribute value
108                    if b'=' == c {
109                        self.value_start_index = self.current_index;
110                        self.is_get_attribute_name = true;
111                        continue;
112                    }
113
114                    // only when match an `blank` char, stop current attribute
115                    if c <= b' ' {
116                        let name = &self.html[self.last_index..(self.current_index - 1)];
117                        let position = Position { start: self.last_index, end: self.current_index - 1 };
118                        let attribute = HTMLTagAttribute {
119                            name: name.to_string(),
120                            value: "".to_string(),
121                        };
122                        self.finished_item();
123                        return Some((position, attribute));
124                    }
125
126                }
127
128            } else {
129
130                // ignore `blank` char
131                if c <= b' ' {
132                    continue;
133                }
134
135                self.is_attribute_start = true;
136                self.is_get_attribute_name = false;
137                self.is_quote_start = false;
138                self.last_index = self.current_index - 1;
139
140            }
141        }
142
143        // the rest text
144        if self.current_index > 1 && self.last_index < self.current_index - 1 {
145            let name = &self.html[self.last_index..];
146            let position = Position { start: self.last_index, end: self.current_index };
147            let attribute = HTMLTagAttribute {
148                name: name.to_string(),
149                value: "".to_string()
150            };
151            self.finished_item();
152            return Some((position, attribute));
153        }
154
155        return None;
156    }
157}
158
159/// Return a HTMLTagAttribute Iterator
160pub fn attr_iter(html: &str) -> HTMLTagAttributeIterator {
161    HTMLTagAttributeIterator::new(html)
162}