1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
/*!

BBClash is the open-source version of the BBCode compiler being built for [Penclash](https://endahallahan.github.io/Penclash-Splash-Site/). Unlike most implementations, BBClash is **not RegEx-based.** It functions like a compiler, tokenizing, lexing, and then constructing compliant HTML from an AST-like object. This makes it robust and good at handling even improperly-formatted input. 

Our BBCode specification can be found [here](https://github.com/EndaHallahan/BBClash/blob/master/Spec.md).

## General Usage:

```rust
use bbclash::bbcode_to_html;

assert_eq!(bbcode_to_html("I'm [i]italic[/i] and [b]bold![/b]"), 
		"<p>I&#x27m <i>italic</i> and <b>bold!</b></p>");
```

BBClash also comes ready out-of-the-box for use as WASM or with other languages via C bindings.

## Pretty and Ugly Output

BBClash has two main modes of operation: *pretty* and *ugly*. Pretty output uses the `bbcode_to_html` function, and excludes improperly formatted bbcode from the final output:

```rust
use bbclash::bbcode_to_html;

assert_eq!(bbcode_to_html("I'm [colour]missing an argument![/colour]"), 
		"<p>I&#x27m missing an argument!</p>");
```

Ugly uses the `bbcode_to_html_ugly` function, and leaves improperly formatted BBCode tags in the final output as written:

```rust
use bbclash::bbcode_to_html_ugly;

assert_eq!(bbcode_to_html_ugly("I'm [colour]missing an argument![/colour]"), 
		"<p>I&#x27m [colour]missing an argument![/colour]</p>");
```

Note that neither mode arbitrarily strips any text in square brackets. this only affects improperly-written BBCode tags; `[non tags]` will not be affected.

## Custom Usage:

Because this package was built for an existing application, and because it is performance-focused, BBClash's BBCode implementation is entirely hard-coded. Because of this, it is reccommended that you download a local copy and modify it to suit your needs. *Note: currently requires Rust Nightly to build. Relevant issue: [54727](https://github.com/rust-lang/rust/issues/54727)*

Building is as simple as running `$ cargo build`. Tests and benchmarks can be run with `$ cargo test` and `$ cargo bench`, respectively.
*/

#![feature(proc_macro_hygiene)]
extern crate rctree;
extern crate phf;

mod bbcode_tokenizer;
mod bbcode_lexer;
mod html_constructor;

pub use crate::bbcode_tokenizer::BBCodeTokenizer;
pub use crate::bbcode_lexer::BBCodeLexer;
pub use crate::html_constructor::HTMLConstructor;

/// Generates a string of HTML from an &str of BBCode.
/// This function produces *pretty* output, meaning that any eroneously written BBCode encountered will be removed from the final output.
/// # Examples
///
/// ```
///use bbclash::bbcode_to_html;
///
///assert_eq!(bbcode_to_html("I'm [i]italic[/i] and [b]bold![/b]"), 
///		"<p>I&#x27m <i>italic</i> and <b>bold!</b></p>");
/// ```
#[no_mangle]
pub extern fn bbcode_to_html(input: &str) -> String {
    let mut tokenizer = BBCodeTokenizer::new();
	let mut lexer = BBCodeLexer::new();
	let mut constructor = HTMLConstructor::new(input.len(), true);
	constructor.construct(lexer.lex(tokenizer.tokenize(input)))
}

/// Generates a string of HTML from an &str of BBCode. 
/// This function produces *ugly* output, meaning that any eroneously written BBCode encountered will be included in the final output.
/// # Examples
///
/// ```
///use bbclash::bbcode_to_html_ugly;
///
///assert_eq!(bbcode_to_html_ugly("I'm [colour]missing an argument![/colour]"), 
///		"<p>I&#x27m [colour]missing an argument![/colour]</p>");
/// ```
#[no_mangle]
pub extern fn bbcode_to_html_ugly(input: &str) -> String {
    let mut tokenizer = BBCodeTokenizer::new();
	let mut lexer = BBCodeLexer::new();
	let mut constructor = HTMLConstructor::new(input.len(), false);
	constructor.construct(lexer.lex(tokenizer.tokenize(input)))
}

/// A single element of a BBCode AST.
#[derive(Debug, Clone)]
pub struct ASTElement {
	ele_type: GroupType,
	text_contents: Option<String>,
	argument: Option<String>,
	is_void: bool,
}
impl ASTElement {
	/// Creates a new ASTElement.
	pub fn new(ele_type: GroupType) -> ASTElement {
		let text_contents = None;
		let argument = None;
		let is_void = false;
		ASTElement{ele_type, text_contents, argument, is_void}
	}
	/// Sets an ASTElement's type.
	pub fn set_ele_type(&mut self, new_type: GroupType) {
		self.ele_type = new_type;
	}
	/// Gets an immutable reference to an ASTElement's type.
	pub fn ele_type(&self) -> &GroupType {
		&self.ele_type
	}
	/// Sets an ASTElement's is_void field (indicates that the ASTElement does not contain text or children).
	pub fn set_void(&mut self, in_void: bool) {
		self.is_void = in_void;
	}
	/// gets the value of an ASTElement's is_void field.
	pub fn is_void(&self) -> bool {
		self.is_void
	}
	/// Adds text to an ASTElement.
	pub fn add_text(&mut self, new_text: &String) {
		if let Some(text) = &self.text_contents {
			self.text_contents = Some(format!("{}{}", text, new_text));
		} else {
			self.text_contents = Some(new_text.to_string());
		}
		
	}
	/// Gets whether or not an ASTElement has text.
	pub fn has_text(&self) -> bool {
		if let Some(_) = &self.text_contents {
			true
		} else {
			false
		}
	}
	/// Gets an immutable reference to an ASTElement's text_contents.
	pub fn text_contents(&self) -> &Option<String> {
		&self.text_contents
	}
	/// Sets an ASTElement's Argument field.
	pub fn set_arg(&mut self, arg: &String) {
		self.argument = Some(arg.to_string());
	}
	/// Adds to arg of an ASTElement.
	pub fn add_arg(&mut self, new_arg: &String) {
		if let Some(arg) = &self.argument {
			self.argument = Some(format!("{}{}", arg, new_arg));
		} else {
			self.argument = Some(new_arg.to_string());
		}
		
	}
	/// Gets whether or not an ASTElement has an argument.
	pub fn has_arg(&mut self) -> bool {
		if let Some(_) = &self.argument {
			true
		} else {
			false
		}
	}
	/// Gets an immutable reference to an ASTElement's argument field.
	pub fn argument(&self) -> &Option<String> {
		&self.argument
	}
}

/// A single Instruction output by the tokenizer.
#[derive(Debug, PartialEq, Clone)]
pub enum Instruction {
	Null,
	Tag(String, Option<String>), 
	Text(String),
	Parabreak(String),
	Linebreak,
	Scenebreak
}

/// Types of ASTElement.
#[derive(Debug, PartialEq, Clone)]
pub enum GroupType{
	Text,
	Hr,
	Br,
	Bold,
	Strong,
	Italic,
	Emphasis,
	Underline,
	Smallcaps,
	Strikethrough,
	Monospace,
	Superscript,
	Subscript,
	Spoiler,
	Colour,
	Url,
	Email,
	Opacity,
	Size,
	Center,
	Right,
	Image,
	Quote,
	Footnote,
	Indent,
	Pre,
	PreLine,
	Header,
	Figure,
	List,
	ListItem,
	Embed,
	Code,
	CodeBlock,
	//Icon,
	Math,
	MathBlock,
	Table,
	TableRow,
	TableData,
	TableHeader,
	Paragraph,
	Scenebreak,
	Null,
	Broken(Box<GroupType>, &'static str),
	Document,
	Anchor
}

///Types of argument for Instructions.
#[derive(Debug, Clone, PartialEq)]
pub enum Argument {
	Colour(String),
	Url(String),
	Quote(String),
}