bbclash/
bbcode_tokenizer.rs

1use super::Instruction;
2
3/// Tokenizer modes.
4#[derive(Debug, PartialEq)]
5enum ReadMode {
6	Text,
7	Escape,
8	Tag,
9	TagPrimaryArg,
10	Parabreak,
11	Scenebreak,
12}
13impl Default for ReadMode {
14    fn default() -> Self {ReadMode::Text}
15}
16
17/// Struct for BBCode tokenization.
18#[derive(Default)]
19pub struct BBCodeTokenizer {
20	mode: ReadMode,
21	current_instruction: Instruction,
22	instructions: Vec<Instruction>
23}
24impl BBCodeTokenizer {
25	/// Creates a new BBCodeTokenizer
26	pub fn new() -> Self {
27		Default::default()
28	}
29	/// Reads and tokenizes BBCode into individual Instructions.
30	pub fn tokenize(&mut self, bbcode: &str) -> &Vec<Instruction> {
31		let bbcode_chars = bbcode.chars();
32		for character in bbcode_chars {
33			match &self.mode {
34				ReadMode::Text => {self.parse_text(character);},
35				ReadMode::Escape => {self.parse_escape(character);},
36				ReadMode::Tag => {self.parse_tag(character);},
37				ReadMode::TagPrimaryArg => {self.parse_tag_primary_arg(character);},
38				ReadMode::Parabreak => {self.parse_parabreak(character);},
39				ReadMode::Scenebreak => {self.parse_scenebreak(character);},
40			}
41		}
42		self.set_cur_instruction();
43		&self.instructions
44	}
45	/// s characters.
46	fn parse_text(&mut self, character: char) {
47		match character {
48			'\\' => {
49				self.mode = ReadMode::Escape
50			},
51			'[' => {
52				self.set_cur_instruction();
53				self.mode = ReadMode::Tag;
54			},
55			'\n' | '\r' => {
56				self.set_cur_instruction();
57				self.mode = ReadMode::Parabreak;
58			},
59			'>' | '<' | '&' | '"' | '\'' => {
60				let san_char = self.sanitize(character);
61				match self.current_instruction {
62					Instruction::Text(ref mut contents) => {
63						contents.push_str(&san_char);
64					},
65					_ => {
66						self.current_instruction = Instruction::Text(san_char);
67					}
68				}
69			},
70			_ => {
71				match self.current_instruction {
72					Instruction::Text(ref mut contents) => {
73						contents.push(character);
74					},
75					_ => {
76						self.current_instruction = Instruction::Text(character.to_string());
77					}
78				}
79			}
80		}
81	}
82	/// s paragraph breaks.
83	fn parse_parabreak(&mut self, character: char) {
84		match character {
85			'\t' => {
86				self.set_new_instruction(Instruction::Parabreak("\n\t".to_string()));
87				self.mode = ReadMode::Text;
88			},
89			'\n' | '\r' => {
90				self.mode = ReadMode::Scenebreak;
91			},
92			' ' => {},
93			_ => {
94				self.set_new_instruction(Instruction::Linebreak);
95				self.mode = ReadMode::Text;
96				self.parse_text(character);
97			}
98		}
99		
100	}
101	/// s scen breaks (three newlines).
102	fn parse_scenebreak(&mut self, character: char) {
103		match character {
104			'\n' | '\r' => {
105				self.set_new_instruction(Instruction::Scenebreak);
106				self.mode = ReadMode::Text;
107			},
108			' ' => {},
109			_ => {
110				self.set_new_instruction(Instruction::Parabreak("\n\n".to_string()));
111				self.mode = ReadMode::Text;
112				self.parse_text(character);
113			}
114		}
115		
116	}
117	/// s escaped charcters.
118	fn parse_escape(&mut self, character: char) {
119		self.mode = ReadMode::Text;
120		match character {
121			'>' | '<' | '&' | '"' | '\'' | '\\' => {
122				let san_char = self.sanitize(character);
123				match self.current_instruction {
124					Instruction::Tag(ref mut contents, _) => {
125						contents.push_str(&san_char);
126					},
127					_ => {
128						self.current_instruction = Instruction::Text(san_char);
129					}
130				}
131			},
132			_ => {
133				match self.current_instruction {
134					Instruction::Text(ref mut contents) => {
135						contents.push(character);
136					},
137					_ => {
138						self.current_instruction = Instruction::Text(character.to_string());
139					}
140				}
141			}
142		}	
143	}
144	/// s BBCode tags.
145	fn parse_tag(&mut self, character: char) {
146		match character {
147			']' => {
148				self.set_cur_instruction();
149				self.mode = ReadMode::Text;
150			},
151			'=' => {
152				self.mode = ReadMode::TagPrimaryArg;
153			},
154			'>' | '<' | '&' | '"' | '\'' | '\\' => {
155				let san_char = self.sanitize(character);
156				match self.current_instruction {
157					Instruction::Tag(ref mut contents, _) => {
158						contents.push_str(&san_char);
159					},
160					_ => {
161						self.current_instruction = Instruction::Tag(san_char, None);
162					}
163				}
164			},
165			_ => {
166				match self.current_instruction {
167					Instruction::Tag(ref mut contents, _) => {
168						contents.push(character);
169					},
170					_ => {
171						self.current_instruction = Instruction::Tag(character.to_string(), None);
172					}
173				}
174			}
175		}	
176	}
177	/// s BBCode tag arguments.
178	fn parse_tag_primary_arg(&mut self, character: char) {
179		match character {
180			']' => {
181				self.set_cur_instruction();
182				self.mode = ReadMode::Text;
183			},
184			'>' | '<' | '&' | '"' | '\'' | '\\' => {
185				let san_char = self.sanitize(character);
186				match self.current_instruction {
187					Instruction::Tag(ref mut contents, ref mut args) => {
188						match args {
189							Some(ref mut primarg) => {
190								primarg.push_str(&san_char);
191							},
192							None => {
193								self.current_instruction = Instruction::Tag((*contents).to_string(), Some(san_char));
194							}
195						}
196					},
197					_ => {
198						unreachable!();
199					}
200				}
201			},
202			_ => {
203				match self.current_instruction {
204					Instruction::Tag(ref mut contents, ref mut args) => {
205						match args {
206							Some(ref mut primarg) => {
207								primarg.push(character);
208							},
209							None => {
210								self.current_instruction = Instruction::Tag((*contents).to_string(), Some(character.to_string()));
211							}
212						}
213					},
214					_ => {
215						unreachable!();
216					}
217				}
218			}
219		}
220	}
221	/// Adds current instruction to instruction vector and restes current instruction.
222	fn set_cur_instruction(&mut self) {
223		if self.current_instruction != Instruction::Null {
224			self.instructions.push(self.current_instruction.clone());
225			self.current_instruction = Instruction::Null;
226		}
227	}
228	/// Adds a given instruction to instruction vector and resets current instruction.
229	fn set_new_instruction(&mut self, instruction: Instruction) {
230		self.instructions.push(instruction);
231		self.current_instruction = Instruction::Null;
232	}
233	/// Sanitizes characters for HTML.
234	fn sanitize(&mut self, character: char) -> String {
235		match character {
236			'<' => "&lt",
237			'>' => "&gt",
238			'&' => "&amp",
239			'"' => "&quot",
240			'\'' => "&#x27",
241			'\\' => "&#x2F",
242			_ => unreachable!()
243		}.to_string()
244	}
245}