bbclash/
bbcode_tokenizer.rs1use super::Instruction;
2
3#[derive(Debug, PartialEq)]
5enum ReadMode {
6 Text,
7 Escape,
8 Tag,
9 TagPrimaryArg,
10 Parabreak,
11 Scenebreak,
12}
13impl Default for ReadMode {
14 fn default() -> Self {ReadMode::Text}
15}
16
17#[derive(Default)]
19pub struct BBCodeTokenizer {
20 mode: ReadMode,
21 current_instruction: Instruction,
22 instructions: Vec<Instruction>
23}
24impl BBCodeTokenizer {
25 pub fn new() -> Self {
27 Default::default()
28 }
29 pub fn tokenize(&mut self, bbcode: &str) -> &Vec<Instruction> {
31 let bbcode_chars = bbcode.chars();
32 for character in bbcode_chars {
33 match &self.mode {
34 ReadMode::Text => {self.parse_text(character);},
35 ReadMode::Escape => {self.parse_escape(character);},
36 ReadMode::Tag => {self.parse_tag(character);},
37 ReadMode::TagPrimaryArg => {self.parse_tag_primary_arg(character);},
38 ReadMode::Parabreak => {self.parse_parabreak(character);},
39 ReadMode::Scenebreak => {self.parse_scenebreak(character);},
40 }
41 }
42 self.set_cur_instruction();
43 &self.instructions
44 }
45 fn parse_text(&mut self, character: char) {
47 match character {
48 '\\' => {
49 self.mode = ReadMode::Escape
50 },
51 '[' => {
52 self.set_cur_instruction();
53 self.mode = ReadMode::Tag;
54 },
55 '\n' | '\r' => {
56 self.set_cur_instruction();
57 self.mode = ReadMode::Parabreak;
58 },
59 '>' | '<' | '&' | '"' | '\'' => {
60 let san_char = self.sanitize(character);
61 match self.current_instruction {
62 Instruction::Text(ref mut contents) => {
63 contents.push_str(&san_char);
64 },
65 _ => {
66 self.current_instruction = Instruction::Text(san_char);
67 }
68 }
69 },
70 _ => {
71 match self.current_instruction {
72 Instruction::Text(ref mut contents) => {
73 contents.push(character);
74 },
75 _ => {
76 self.current_instruction = Instruction::Text(character.to_string());
77 }
78 }
79 }
80 }
81 }
82 fn parse_parabreak(&mut self, character: char) {
84 match character {
85 '\t' => {
86 self.set_new_instruction(Instruction::Parabreak("\n\t".to_string()));
87 self.mode = ReadMode::Text;
88 },
89 '\n' | '\r' => {
90 self.mode = ReadMode::Scenebreak;
91 },
92 ' ' => {},
93 _ => {
94 self.set_new_instruction(Instruction::Linebreak);
95 self.mode = ReadMode::Text;
96 self.parse_text(character);
97 }
98 }
99
100 }
101 fn parse_scenebreak(&mut self, character: char) {
103 match character {
104 '\n' | '\r' => {
105 self.set_new_instruction(Instruction::Scenebreak);
106 self.mode = ReadMode::Text;
107 },
108 ' ' => {},
109 _ => {
110 self.set_new_instruction(Instruction::Parabreak("\n\n".to_string()));
111 self.mode = ReadMode::Text;
112 self.parse_text(character);
113 }
114 }
115
116 }
117 fn parse_escape(&mut self, character: char) {
119 self.mode = ReadMode::Text;
120 match character {
121 '>' | '<' | '&' | '"' | '\'' | '\\' => {
122 let san_char = self.sanitize(character);
123 match self.current_instruction {
124 Instruction::Tag(ref mut contents, _) => {
125 contents.push_str(&san_char);
126 },
127 _ => {
128 self.current_instruction = Instruction::Text(san_char);
129 }
130 }
131 },
132 _ => {
133 match self.current_instruction {
134 Instruction::Text(ref mut contents) => {
135 contents.push(character);
136 },
137 _ => {
138 self.current_instruction = Instruction::Text(character.to_string());
139 }
140 }
141 }
142 }
143 }
144 fn parse_tag(&mut self, character: char) {
146 match character {
147 ']' => {
148 self.set_cur_instruction();
149 self.mode = ReadMode::Text;
150 },
151 '=' => {
152 self.mode = ReadMode::TagPrimaryArg;
153 },
154 '>' | '<' | '&' | '"' | '\'' | '\\' => {
155 let san_char = self.sanitize(character);
156 match self.current_instruction {
157 Instruction::Tag(ref mut contents, _) => {
158 contents.push_str(&san_char);
159 },
160 _ => {
161 self.current_instruction = Instruction::Tag(san_char, None);
162 }
163 }
164 },
165 _ => {
166 match self.current_instruction {
167 Instruction::Tag(ref mut contents, _) => {
168 contents.push(character);
169 },
170 _ => {
171 self.current_instruction = Instruction::Tag(character.to_string(), None);
172 }
173 }
174 }
175 }
176 }
177 fn parse_tag_primary_arg(&mut self, character: char) {
179 match character {
180 ']' => {
181 self.set_cur_instruction();
182 self.mode = ReadMode::Text;
183 },
184 '>' | '<' | '&' | '"' | '\'' | '\\' => {
185 let san_char = self.sanitize(character);
186 match self.current_instruction {
187 Instruction::Tag(ref mut contents, ref mut args) => {
188 match args {
189 Some(ref mut primarg) => {
190 primarg.push_str(&san_char);
191 },
192 None => {
193 self.current_instruction = Instruction::Tag((*contents).to_string(), Some(san_char));
194 }
195 }
196 },
197 _ => {
198 unreachable!();
199 }
200 }
201 },
202 _ => {
203 match self.current_instruction {
204 Instruction::Tag(ref mut contents, ref mut args) => {
205 match args {
206 Some(ref mut primarg) => {
207 primarg.push(character);
208 },
209 None => {
210 self.current_instruction = Instruction::Tag((*contents).to_string(), Some(character.to_string()));
211 }
212 }
213 },
214 _ => {
215 unreachable!();
216 }
217 }
218 }
219 }
220 }
221 fn set_cur_instruction(&mut self) {
223 if self.current_instruction != Instruction::Null {
224 self.instructions.push(self.current_instruction.clone());
225 self.current_instruction = Instruction::Null;
226 }
227 }
228 fn set_new_instruction(&mut self, instruction: Instruction) {
230 self.instructions.push(instruction);
231 self.current_instruction = Instruction::Null;
232 }
233 fn sanitize(&mut self, character: char) -> String {
235 match character {
236 '<' => "<",
237 '>' => ">",
238 '&' => "&",
239 '"' => """,
240 '\'' => "'",
241 '\\' => "/",
242 _ => unreachable!()
243 }.to_string()
244 }
245}