bbscope/
lib.rs

1use std::sync::Arc;
2
3use regex::{Regex, Captures, Error};
4
5// Carlos Sanchez - 2022-12-05
6// - For SBS
7
8//So:
9//- early closures close all tags in the previous scope
10//- ignore unmatched closing tags
11//- close all unclosed tags at the end
12//- block-level tags consume the first newline after the open and close tags
13
14static AUTOLINKID: &str = "autolinker";
15static CONSUMETEXTID : &str = "consumetext";
16static NORMALTEXTID: &str = "normaltext";
17static BRNEWLINEID: &str = "convertnewlinebr";
18static NEWLINEID: &str = "newline";
19
20const BASICTAGS: &[&str] = &[
21    "b", "i", "sup", "sub", "u", "s", "list", r"\*", "url", "img"
22];
23const EXTENDEDTAGS: &[&str] = &[
24    "h1", "h2", "h3", "anchor", "quote", "spoiler", "icode", "code", "youtube"
25];
26
27/// The type for your emit closure which will take the open tag capture, body, and close tag capture and 
28/// output whatever you want. used with 
29pub type EmitScope = Arc<dyn Fn(Option<Captures>, &str, Option<Captures>)->String + Send + Sync>; 
30/// The type for your emit closure when you're doing basic regex replacement with [`MatchType::Simple`]
31pub type EmitSimple = Arc<dyn Fn(Captures)->String + Send + Sync>; //Inefficient to use String but we have to in order to support replacements etc
32
33/// Information about a scoped tag with open and close elements. This gives you the power to craft
34/// many kinds of markup, not just bbcode, and have it understand scope
35pub struct ScopeInfo {
36    /// A list of [`MatchInfo`] ids for tags which are allowed to be parsed within the body of 
37    /// this scope. If [`None`], any tag is allowed (default)
38    pub only: Option<Vec<&'static str>>,
39    /// Whether or not this scope automatically closes previous scopes of the same type. Generally
40    /// this isn't how tags work: doing [b][b][b]EXTRA BOLD[/b][/b][/b] produces triple bold text.
41    /// However, elements like [*] (list item) benefit from closing the previous scope if it's the
42    /// same type
43    pub double_closes: bool,
44    /// The core of the parsing system: your provided closure which transforms the pre-parsed data
45    /// given into HTML (or whatever)
46    pub emit: EmitScope, 
47}
48
49impl Default for ScopeInfo {
50    /// Create a default scope info with an emitter that only outputs the body of the tags
51    /// (you probably don't want this!)
52    fn default() -> Self {
53        Self {
54            only: None,
55            double_closes: false,
56            emit: Arc::new(|_o, b, _c| String::from(b))
57        }
58    }
59}
60
61impl ScopeInfo {
62    /// Create a basic scope info using only an emiter (a highly common case, you probably want this)
63    pub fn basic(emitter: EmitScope) -> Self {
64        let mut result = Self::default();
65        result.emit = emitter;
66        result
67    }
68}
69
70/// This defines how scopes and whole blocks of text move into the output. Basically: is your matcher
71/// using scope or not? If using scope, you need to define an open and close [`MatchInfo`]. If not, 
72/// just use the [`Simple`] option
73pub enum MatchType { 
74    /// A match type that requires no scoping rules: just a simple regex replacement (or whatever replacement you want)
75    Simple(EmitSimple), 
76    /// The match should expect an open tag, which increases scope and performs open scope rules
77    Open(Arc<ScopeInfo>), 
78    /// The match should expect a closing tag, which decreases scope and performs close scope rules
79    Close
80}
81
82/// Definition for a block level matcher. Should always be readonly, it is just a definition. 
83/// Not necessarily a scoped element, could define eating garbage, passing normal text through, etc.
84/// It's all up to the 'match_type'
85pub struct MatchInfo {
86    /// A unique identifier for you to reference in things like [`ScopeInfo.only`]
87    pub id: &'static str,
88    /// The regex for this parse item. Most likely an open or close tag, but you can do anything you want
89    pub regex : Regex,
90    /// The type of match, indicates whether to open or close a scope (or perform no scoping)
91    pub match_type: MatchType,
92}
93
94//For the following section, it is assumed the entire scoper and all elements within will have the same lifetime
95//as the calling function, which also houses the input string.
96
97/// A scope for bbcode tags. Scopes increase and decrease as tags are opened and closed. Scopes are placed on a stack
98/// to aid with auto-closing tags
99struct BBScope<'a> {
100    /// Id of the [`MatchInfo`] which produced this scope. Used for tracking, double_closes detection, etc
101    id: &'static str,
102    /// The scope information from the [`MatchInfo`] which describes the rules of this current scope. Should
103    /// always be a reference, as it's just informational
104    info: Arc<ScopeInfo>,
105    /// The regex capture for the open tag. We save this for later, when we finally emit the completed scope.
106    /// Your [`EmitScope`] closure in [`ScopeInfo`] receives this as the first argument
107    open_tag_capture: Option<Captures<'a>>,
108    /// The currently retained body of this scope. If the scope is on top, this is where general text goes
109    /// before the closing tag is detected. Your [`EmitScope`] closure in [`ScopeInfo`] receives this as the 
110    /// second argument
111    body: String, 
112}
113
114impl BBScope<'_> {
115    /// Is the given element `info` (such as bold/italic/url) allowed to exist inside this scope
116    fn is_allowed(&self, info: &MatchInfo) -> bool {
117        if let Some(only) = &self.info.only {
118            //A special case: your own closing tag is always allowed of course!
119            if self.id == info.id && matches!(info.match_type, MatchType::Close) {
120                return true;
121            } 
122            for &only_str in only.iter() {
123                //If the only_str starts with attr:, we only allow this inner tag if the current scope
124                //tag has an attr (attribute).
125                if only_str.starts_with("attr:") {
126                    let only_name = &only_str[5..];
127                    if only_name == info.id {
128                        if let Some(ref capture) = self.open_tag_capture {
129                            if capture.name("attr").is_some() {
130                                return true;
131                            }
132                        }
133                    }
134                }
135                else if only_str == info.id {
136                    return true;
137                }
138            }
139            //Nothing was found, you're not allowed
140            return false;
141        }
142        //If there's no only field, allow everything
143        else {
144            true
145        }
146    }
147    /// Does the opener of this element with given id (such as bold/italic/etc) close this scope?
148    fn closes(&self, id: &str) -> bool {
149        self.id == id && self.info.double_closes
150    }
151    /// Consume the scope and emit the text. Only do this when you're the top scope on the stack!
152    fn emit(self, close_captures: Option<Captures>) -> String {
153        let emitter = &self.info.emit;
154        emitter(self.open_tag_capture, &self.body, close_captures)
155    }
156}
157
158/// A container with functions to help manage scopes. It doesn't understand what bbcode is or how the tags should
159/// be formatted, it just handles pushing and popping scopes on the stack
160struct BBScoper<'a> {
161    scopes : Vec<BBScope<'a>>
162}
163
164/// Everything inside BBScoper expects to live as long as the object itself. So everything is 'a
165impl<'a> BBScoper<'a> 
166{
167    /// Create a new scoper service with the starting scope already applied. The starting scope
168    /// is the catch-all for the final string to output, it must exist
169    fn new() -> Self { 
170        Self { 
171            scopes: vec![
172                BBScope { 
173                    id: "STARTING_SCOPE", 
174                    info: Arc::new(ScopeInfo::default()) ,
175                    open_tag_capture: None, 
176                    body: String::new() 
177                }
178            ]
179        }
180    }
181
182    /// Given the current state of our scopes, is the element with the given id (such as bold/italic/url)
183    /// allowed to exist as the next element inside us right now? Basically: can the current scope accept this element
184    fn is_allowed(&self, id: &MatchInfo) -> bool {
185        self.scopes.last().unwrap().is_allowed(id)
186    }
187
188    /// Remove the top scope, emitting the output into the scope below it
189    fn close_last(&mut self, close_tag: Option<Captures>) {
190        if let Some(scope) = self.scopes.pop() {
191            let body = scope.emit(close_tag); //this consumes the scope, which is fine
192            self.add_text(&body);
193        }
194        else {
195            panic!("BBScoper::close_last HOW DID THIS HAPPEN? There were scopes from .last but pop returned none!");
196        }
197    }
198
199    /// Append a string to the current top scope, useful if you're just passing through normal user text
200    fn add_text(&mut self, text: &str) {
201        // I don't know how to do this the right way, I'm sorry
202        let mut last_scope = self.scopes.pop().unwrap();
203        last_scope.body.push_str(text);
204        self.scopes.push(last_scope);
205    }
206
207    /// Append a single char to the current top scope
208    fn add_char(&mut self, ch: char) {
209        let mut last_scope = self.scopes.pop().unwrap();
210        last_scope.body.push(ch);
211        self.scopes.push(last_scope);
212    }
213
214    /// Add a scope, which may close some existing scopes. The closed scopes are returned in display order.
215    /// NOTE: the added infos must live as long as the scope container!
216    fn add_scope(&mut self, scope: BBScope<'a>) { 
217        //here we assume all taginfos have unique tags because why wouldn't they
218        if let Some(topinfo) = self.scopes.last() {
219            //oh the thing on top is the same, if we don't want that, close it.
220            if topinfo.closes(scope.id) { 
221                self.close_last(None);
222            }
223        }
224
225        self.scopes.push(scope);
226    }
227    
228    /// Close the given scope, which should return the scopes that got closed (including the self).
229    /// If no scope could be found, the vector is empty
230    fn close_scope(&mut self, id: &'static str) -> usize { 
231        let mut scope_count = 0;
232        let mut tag_found : bool = false; 
233
234        //Scan backwards, counting. Stop when you find a matching tag. This lets us know the open child scopes
235        //that were not closed
236        for scope in self.scopes.iter().rev() {
237            scope_count += 1;
238            if id == scope.id {
239                //tag_found = Some(&scope.body);
240                tag_found = true;
241                break;
242            }
243        }
244
245        //Return all the scopes from the end to the found closed scope. Oh and also remove them
246        if tag_found { 
247            for _i in 0..scope_count {
248                self.close_last(None); 
249            }
250            scope_count
251        }
252        else {
253            0 //No scopes closed
254        }
255    }
256
257    /// Consume the scope system while dumping the rest of the scopes in the right order for display
258    fn dump_remaining(mut self) -> String { 
259        while self.scopes.len() > 1 {
260            self.close_last(None)
261        }
262        self.scopes.pop().unwrap().emit(None)
263    }
264}
265
266
267// ------------------------------
268// *     MAIN FUNCTIONALITY    *
269// ------------------------------
270
271/// The ways in which you can configure the default link 'target' behavior.
272#[derive(Clone, Debug, Default)]
273pub enum BBCodeLinkTarget {
274    /// Do not generate a 'target' attribute
275    None,
276    /// Generate a 'target="_blank" attribute
277    #[default]
278    Blank
279}
280
281/// Configuration for tag generation. Generally not necessary, as you can just
282/// generate your own tags with more ease and more configuration than this, but
283/// this is useful for quick and common modifications to normal tag generation.
284#[derive(Clone, Debug)]
285pub struct BBCodeTagConfig {
286    pub link_target: BBCodeLinkTarget,
287    pub img_in_url: bool,
288    pub newline_to_br: bool,
289    pub accepted_tags: Vec<String>,
290    //pub finalize_matchers: Option<Vec<MatchInfo>>
291}
292
293impl Default for BBCodeTagConfig {
294    /// Produce a default configuration, which includes the basic set of tags and 
295    /// (hopefully) expected defaults
296    fn default() -> Self {
297        Self {
298            link_target: BBCodeLinkTarget::default(),
299            img_in_url: true,
300            newline_to_br: true,
301            accepted_tags: BASICTAGS.iter().map(|t| t.to_string()).collect(),
302            //finalize_matchers: None
303        }
304    }
305}
306
307impl BBCodeTagConfig {
308    /// Produce a default configuration but include the extended tags
309    pub fn extended() -> Self {
310        let mut config = BBCodeTagConfig::default();
311        let mut extags : Vec<String> = EXTENDEDTAGS.iter().map(|t| t.to_string()).collect();
312        config.accepted_tags.append(&mut extags);
313        config
314    }
315}
316
317/// The main bbcode system. You create this to parse bbcode! Inexpensive clones,
318/// since fields are all reference counted.
319#[derive(Clone)] //All the members implement clone
320pub struct BBCode {
321    /// Supply this!
322    pub matchers: Arc<Vec<MatchInfo>>, //These are SOMETIMES processed (based on context)
323
324    #[cfg(feature = "profiling")]
325    pub profiler: onestop::OneList<onestop::OneDuration>
326}
327
328impl BBCode 
329{
330    /// Get a default bbcode parser. Should hopefully have reasonable defaults!
331    pub fn default() -> Result<Self, Error> {
332        Ok(Self::from_config(BBCodeTagConfig::default(), None)?)
333    }
334
335    /// Create a BBCode parser from the given list of matchers. If you're building a fully custom set of tags, 
336    /// use this endpoint
337    pub fn from_matchers(matchers: Vec<MatchInfo>) -> Self {
338        Self {
339            matchers: Arc::new(matchers),
340            #[cfg(feature = "profiling")]
341            profiler: onestop::OneList::<onestop::OneDuration>::new()
342        }
343    }
344
345    /// Create a BBCode parser from a config, using standard tags (plus any extras specified in the config). If you
346    /// want a tweaked BBCode parser but based off reasonable defaults, use this. `BBCode::default()` is the same
347    /// as calling this with `BBCodeTagConfig::default()`. You can also optionally pass in more tags you wish to support
348    /// (this is done because the order of tag matchers is important, and the "standard" configuration requires some internal
349    /// matchers to come after yours)
350    pub fn from_config(config: BBCodeTagConfig, additional_matchers: Option<Vec<MatchInfo>>) -> Result<Self, Error>
351    {
352        let mut matches : Vec<MatchInfo> = Vec::new(); 
353
354        //This is an optimization: any large block of characters that has no meaning in bbcode can go straight through.
355        matches.push(MatchInfo {
356            id: NORMALTEXTID,
357            //We use h to catch ourselves on https. this unfortunately breaks up large sections of text into much
358            //smaller ones, but it should be ok... I don't know. My parser is stupid lol
359            regex: Regex::new(r#"^[^\[\n\rh]+"#)?, 
360            match_type : MatchType::Simple(Arc::new(|c| String::from(html_escape::encode_quoted_attribute(&c[0]))))
361        });
362
363        //Throw away these characters
364        matches.push(MatchInfo { 
365            id: CONSUMETEXTID,
366            regex: Regex::new(r#"^[\r]+"#)?, 
367            match_type: MatchType::Simple(Arc::new(|_c| String::new()))
368        });
369
370        let target_attr = match config.link_target {
371            BBCodeLinkTarget::Blank => "target=\"_blank\"",
372            BBCodeLinkTarget::None => ""
373        };
374        let target_attr_c1 = target_attr.clone();
375
376        let mut url_only = Self::plaintext_ids();
377        if config.img_in_url {
378            url_only.push("attr:img")
379        }
380
381        let accepted_tags : Vec<&str> = config.accepted_tags.iter().map(|t| t.as_str()).collect();
382
383        macro_rules! addmatch {
384            ($name:literal, $value:expr) => {
385                addmatch!($name, $value, None, None)
386            };
387            ($name:literal, $value:expr, $open:expr, $close:expr) => {
388                if accepted_tags.contains(&$name) {
389                    Self::add_tagmatcher(&mut matches, $name, $value, $open, $close)?
390                }
391            }
392        }
393
394        #[allow(unused_variables)]
395        {
396            //Basic
397            addmatch!("b", ScopeInfo::basic(Arc::new(|o,b,c| format!("<b>{b}</b>"))));
398            addmatch!("i", ScopeInfo::basic(Arc::new(|o,b,c| format!("<i>{b}</i>"))));
399            addmatch!("sup", ScopeInfo::basic(Arc::new(|o,b,c| format!("<sup>{b}</sup>"))));
400            addmatch!("sub", ScopeInfo::basic(Arc::new(|o,b,c| format!("<sub>{b}</sub>"))));
401            addmatch!("u", ScopeInfo::basic(Arc::new(|o,b,c| format!("<u>{b}</u>"))));
402            addmatch!("s", ScopeInfo::basic(Arc::new(|o,b,c| format!("<s>{b}</s>"))));
403            addmatch!("list", ScopeInfo::basic(Arc::new(|o,b,c| format!("<ul>{b}</ul>"))), Some((0,1)), Some((0,1)));
404            //There's a [list=1] thing, wonder how to do that. It's nonstandard, our list format is entirely nonstandard
405            addmatch!(r"\*", ScopeInfo { 
406                only: None, double_closes: true, emit: Arc::new(|o,b,c| format!("<li>{b}</li>"))
407            }, Some((1,0)), Some((1,0)));
408            addmatch!(r"url", ScopeInfo { 
409                only: Some(url_only),
410                double_closes: false, 
411                emit: Arc::new(move |o,b,c| format!(r#"<a href="{}" {}>{}</a>"#, Self::attr_or_body(&o,b), target_attr, b) )
412            });
413            addmatch!(r"img", ScopeInfo { 
414                only: Some(Self::plaintext_ids()),
415                double_closes: false, 
416                emit: Arc::new(|o,b,c| format!(r#"<img src="{}">"#, Self::attr_or_body(&o,b)) )
417            });
418
419            //Extras
420            addmatch!("h1", ScopeInfo::basic(Arc::new(|_o,b,_c| format!("<h1>{}</h1>",b))), Some((0,1)), Some((1,1)));
421            addmatch!("h2", ScopeInfo::basic(Arc::new(|_o,b,_c| format!("<h2>{}</h2>",b))), Some((0,1)), Some((1,1)));
422            addmatch!("h3", ScopeInfo::basic(Arc::new(|_o,b,_c| format!("<h3>{}</h3>",b))), Some((0,1)), Some((1,1)));
423            addmatch!("anchor", ScopeInfo::basic(
424                Arc::new(|o,b,_c| format!(r##"<a{} href="#{}">{}</a>"##, Self::attr_or_nothing(&o,"name"), Self::attr_or_body(&o,""), b))));
425            addmatch!("quote", ScopeInfo::basic(
426                Arc::new(|o,b,_c| format!(r#"<blockquote{}>{}</blockquote>"#, Self::attr_or_nothing(&o,"cite"), b))), Some((0,1)), Some((0,1)));
427            addmatch!("spoiler", ScopeInfo::basic(
428                Arc::new(|o,b,_c| format!(r#"<details class="spoiler">{}{}</details>"#, Self::tag_or_something(&o,"summary", Some("Spoiler")), b))));
429            addmatch!("icode", ScopeInfo {
430                only: Some(Self::plaintext_ids()),
431                double_closes: false,
432                emit: Arc::new(|_o,b,_c| format!(r#"<span class="icode">{b}</span>"#))
433            });
434            addmatch!("code", ScopeInfo {
435                only: Some(Self::plaintext_ids()),
436                double_closes: false,
437                emit: Arc::new(|o,b,_c| format!(r#"<pre class="code"{}>{}</pre>"#, Self::attr_or_nothing(&o, "data-code"), b) )
438            }, Some((0,1)), Some((0,1)));
439            addmatch!("youtube", ScopeInfo {
440                only: Some(Self::plaintext_ids()),
441                double_closes: false,
442                emit: Arc::new(|o,b,_c| format!(r#"<a href={} target="_blank" data-youtube>{}</a>"#, Self::attr_or_body(&o, b), b) )
443            });
444        }
445
446        //WARN: I had to put the newline matcher at the end because of tag newline consumption! This makes configuration
447        //and adding new tags a lot more complicated (meaning the user has to pass their matchers in when configuring
448        //the parser and they can't be changed)! Unfortunate!
449        if let Some(m) = additional_matchers {
450            matches.extend(m);
451        }
452
453        //The ordering is important! If the user requested <br> output, that needs to come first, so it supercedes
454        //the regular newline consumer! But we must include the newline consumer for verbatim sections (like code)
455        if config.newline_to_br {
456            matches.push(MatchInfo { 
457                id: BRNEWLINEID, 
458                regex: Regex::new(r#"^\n"#)?, 
459                match_type: MatchType::Simple(Arc::new(|_c| String::from("<br>")))
460            })
461        }
462        matches.push(MatchInfo {  //This passes through newlines directly. A catch for when <br> isn't allowed
463            id: NEWLINEID, 
464            regex: Regex::new(r#"^[\n]+"#)?, 
465            match_type: MatchType::Simple(Arc::new(|c| String::from(&c[0])))
466        });
467
468        //This new autolinker is taken from 12 since it works better
469        let url_chars = r#"[-a-zA-Z0-9_/%&=#+~@$*'!?,.;:]*"#;
470        let end_chars = r#"[-a-zA-Z0-9_/%&=#+~@$*']"#;
471        let autolink_regex = format!("^https?://{0}{1}([(]{0}[)]({0}{1})?)?", url_chars, end_chars);
472
473        //Don't forget about autolinking! This is a crappy autolinker and it doesn't matter too much!
474        matches.push(MatchInfo { 
475            id: AUTOLINKID,
476            //characters taken from google's page https://developers.google.com/maps/url-encoding
477            //NOTE: removed certain characters from autolinking because they SUCK
478            regex: Regex::new(&autolink_regex)?,
479            match_type: MatchType::Simple(Arc::new(move |c| format!(r#"<a href="{0}" {1}>{0}</a>"#, &c[0], target_attr_c1)))
480        });
481
482        Ok(Self::from_matchers(matches))
483    }
484
485    /// Convert the current bbcode instance to one which consumes all tags it used to parse. The raw text
486    /// SHOULD be left untouched (I think?)
487    pub fn to_consumer(&mut self) 
488    {
489        let new_matchers : Vec<MatchInfo> = 
490            self.matchers.iter().map(|m| 
491            { 
492                match &m.match_type {
493                    MatchType::Open(_) | MatchType::Close => {
494                        MatchInfo {
495                            id: m.id,
496                            regex: m.regex.clone(),
497                            match_type: MatchType::Simple(Arc::new(|_| String::new()))
498                        }
499                    },
500                    MatchType::Simple(f) => {
501                        MatchInfo {
502                            id: m.id,
503                            regex: m.regex.clone(),
504                            match_type: MatchType::Simple(f.clone())
505                        }
506                    }
507                }
508            }).collect();
509        self.matchers = Arc::new(new_matchers);
510    }
511
512    /// Produce the two basic regexes (open and close) for bbcode tags
513    pub fn get_tagregex(tag: &'static str, open_consume: Option<(i32,i32)>, close_consume: Option<(i32,i32)>) -> (String, String) 
514    {
515        let pre_openchomp; let post_openchomp; let pre_closechomp; let post_closechomp;
516        match open_consume {
517            Some((pre, post)) => {
518                pre_openchomp = format!("(?:\r?\n){{0,{}}}", pre);
519                post_openchomp = format!("(?:\r?\n){{0,{}}}", post);
520            },
521            None => {
522                pre_openchomp = String::new();
523                post_openchomp = String::new();
524            }
525        }
526        match close_consume {
527            Some((pre, post)) => {
528                pre_closechomp = format!("(?:\r?\n){{0,{}}}", pre);
529                post_closechomp = format!("(?:\r?\n){{0,{}}}", post);
530            },
531            None => {
532                pre_closechomp = String::new();
533                post_closechomp = String::new();
534            }
535        }
536        let open_tag = format!(r#"^{0}\[{1}((?:[ \t]+{1})?=(?P<attr>[^\]\n]*))?\]{2}"#, pre_openchomp, Self::tag_insensitive(tag), post_openchomp);
537        let close_tag = format!(r#"^{}\[/{}\]{}"#, pre_closechomp, Self::tag_insensitive(tag), post_closechomp);
538        (open_tag, close_tag)
539    }
540
541    /// Add the open and close matches to the given vector for the given tag (you must construct ScopeInfo yourself). 
542    /// open_consume and close_consume are the amount of newlines to take before and after the open and close tag
543    pub fn add_tagmatcher(matchers: &mut Vec<MatchInfo>, tag: &'static str, info: ScopeInfo, open_consume: Option<(i32,i32)>, close_consume: Option<(i32,i32)>) -> Result<(), Error> { //open_regex: String, close_regex: String) -> Result<(), Error> {
544        let (open_tag, close_tag) = Self::get_tagregex(tag, open_consume, close_consume);
545        matchers.push(MatchInfo { 
546            id: tag, 
547            regex: Regex::new(&open_tag)?, 
548            match_type: MatchType::Open(Arc::new(info))
549        });
550        matchers.push(MatchInfo { 
551            id: tag, 
552            regex: Regex::new(&close_tag)?, 
553            match_type: MatchType::Close,
554        });
555        Ok(())
556    }
557
558    /// This is to avoid the unicode requirement, which we don't need to check simple ascii tags
559    fn tag_insensitive(tag: &str) -> String {
560        let mut result = String::with_capacity(tag.len() * 4);
561        let mut skip = 0;
562        for c in tag.to_ascii_lowercase().chars() {
563            if c == '\\' {
564                skip = 2;
565            }
566            if skip > 0 {
567                skip -= 1;
568                result.push(c);
569                continue;
570            }
571            result.push_str("[");
572            result.push(c);
573            result.push(c.to_ascii_uppercase());
574            result.push_str("]");
575        }
576        result
577    }
578    
579    fn attr_or_body(opener: &Option<Captures>, body: &str) -> String {
580        if let Some(opener) = opener { 
581            if let Some(group) = opener.name("attr") {
582                return String::from(html_escape::encode_quoted_attribute(group.as_str()));
583            }
584        }
585        return String::from(body);
586    }
587
588    fn attr_or_nothing(opener: &Option<Captures>, name: &str) -> String {
589        if let Some(opener) = opener {
590            if let Some(group) = opener.name("attr") {
591                //Note: WE insert the space!
592                return format!(" {}=\"{}\"", name, html_escape::encode_quoted_attribute(group.as_str()));
593            }
594        }
595        return String::new();
596    }
597
598    fn tag_or_something(opener: &Option<Captures>, tag: &str, something: Option<&str>) -> String {
599        if let Some(opener) = opener {
600            if let Some(group) = opener.name("attr") {
601                //Note: WE insert the space!
602                return format!("<{0}>{1}</{0}>", tag, html_escape::encode_quoted_attribute(group.as_str()));
603            }
604        }
605        if let Some(something) = something {
606            return format!("<{0}>{1}</{0}>", tag, html_escape::encode_quoted_attribute(something));
607        }
608        return String::new();
609    }
610
611    pub fn plaintext_ids() -> Vec<&'static str> {
612        vec![NORMALTEXTID, CONSUMETEXTID]
613    }
614
615    /// Main function! You call this to parse your raw bbcode! It also escapes html stuff so it can
616    /// be used raw!  Current version keeps newlines as-is and it's expected you use pre-wrap, later
617    /// there may be modes for more standard implementations
618    pub fn parse(&self, input: &str) -> String 
619    {
620        //Because of utf-8, it's better to just use regex directly all the time?
621        let mut slice = &input[0..]; //Not necessary to be this explicit ofc
622
623        //Only 'Taginfo' can create scope, so don't worry about "DirectReplace" types
624        let mut scoper = BBScoper::new();
625
626        //While there is string left, keep checking against all the regex. Remove some regex
627        //if the current scope is a meanie
628        while slice.len() > 0
629        {
630            let mut matched_info : Option<&MatchInfo> = None;
631
632            //figure out which next element matches (if any). This is the terrible optimization part, but
633            //these are such small state machines with nothing too crazy that I think it's fine.... maybe.
634            //Especially since they all start at the start of the string
635            for matchinfo in self.matchers.iter() {
636                if !scoper.is_allowed(matchinfo) {
637                    continue;
638                }
639                else if matchinfo.regex.is_match(slice) {
640                    matched_info = Some(matchinfo);
641                    break;
642                }
643            }
644
645            //SOMETHING matched, which means we do something special to consume the output
646            if let Some(tagdo) = matched_info 
647            {
648                //There should only be one but whatever
649                for captures in tagdo.regex.captures_iter(slice) {
650                    slice = &slice[captures[0].len()..];
651                    match &tagdo.match_type {
652                        MatchType::Simple(closure) => {
653                            scoper.add_text(&closure(captures));
654                        }
655                        MatchType::Open(info) => {
656                            //Need to enter a scope. Remember where the beginning of this scope is just in case we need it
657                            let new_scope = BBScope {
658                                id: tagdo.id,
659                                info: info.clone(),
660                                open_tag_capture: Some(captures),
661                                body: String::new()
662                            };
663                            scoper.add_scope(new_scope);
664                        },
665                        MatchType::Close => { 
666                            //Attempt to close the given scope. The scoper will return all the actual scopes
667                            //that were closed, which we can dump
668                            scoper.close_scope(tagdo.id);
669                        }
670                    }
671                }
672            }
673            else  //Nothing matched, so we just consume the next character. This should be very rare
674            {
675                //just move forward and emit the char. Note that the slice is in bytes, but the char
676                //is a unicode scalar that could be up to 4 bytes, so we need to know how many 'bytes'
677                //we just popped off
678                if let Some(ch) = slice.chars().next() {
679                    scoper.add_char(ch);
680                    slice = &slice[ch.len_utf8()..];
681                }
682                else {
683                    println!("In BBCode::parse, there were no more characters but there were leftover bytes!");
684                    break;
685                }
686            }
687        }
688
689        scoper.dump_remaining()
690    }
691
692    /// This MAY OR MAY NOT profile, depending on your featureset!
693    pub fn parse_profiled_opt(&mut self, input: &str, _name: String) -> String 
694    {
695        #[cfg(feature = "profiling")]
696        {
697            let mut profile = onestop::OneDuration::new(_name);
698            let result = self.parse(input);
699            profile.finish();
700            self.profiler.add(profile);
701            result
702        }
703
704        #[cfg(not(feature = "profiling"))]
705        return self.parse(input);
706    }
707
708}
709
710
711// ----------------------------
712// *         TESTS           
713// ----------------------------
714
715#[cfg(test)]
716mod tests {
717    use super::*;
718
719    macro_rules! bbtest_basics {
720        ($($name:ident: $value:expr;)*) => {
721        $(
722            #[test]
723            fn $name() {
724                let bbcode = BBCode::default().unwrap(); //BBCode::from_matchers(BBCode::basics().unwrap());
725                let (input, expected) = $value;
726                assert_eq!(bbcode.parse(input), expected);
727            }
728        )*
729        }
730    }
731
732    macro_rules! bbtest_nondefaults{
733        ($($name:ident: $value:expr;)*) => {
734        $(
735            #[test]
736            fn $name() {
737                let mut config = BBCodeTagConfig::default();
738                config.link_target = BBCodeLinkTarget::None;
739                config.img_in_url = false;
740                config.newline_to_br = false;
741                config.accepted_tags = vec![String::from("b"), String::from("u"), String::from("code"), String::from("url"), String::from("img")];
742                let bbcode = BBCode::from_config(config, None).unwrap(); 
743                let (input, expected) = $value;
744                assert_eq!(bbcode.parse(input), expected);
745            }
746        )*
747        }
748    }
749
750    macro_rules! bbtest_extras {
751        ($($name:ident: $value:expr;)*) => {
752        $(
753            #[test]
754            fn $name() {
755                let mut matchers = Vec::<MatchInfo>::new(); // A list of NEW matchers we'll pass to from_config
756                let color_emitter : EmitScope = Arc::new(|open_capture,body,_c| {
757                    let color = open_capture.unwrap().name("attr").unwrap().as_str();
758                    format!(r#"<span style="color:{}">{}</span>"#, color, body)
759                });
760                BBCode::add_tagmatcher(&mut matchers, "color", ScopeInfo::basic(color_emitter), None, None).unwrap();
761                let bbcode = BBCode::from_config(BBCodeTagConfig::extended(), Some(matchers)).unwrap();
762                let (input, expected) = $value;
763                assert_eq!(bbcode.parse(input), expected);
764            }
765        )*
766        }
767    }
768
769    macro_rules! bbtest_consumer {
770        ($($name:ident: $value:expr;)*) => {
771        $(
772            #[test]
773            fn $name() {
774                let mut bbcode = BBCode::from_config(BBCodeTagConfig::extended(), None).unwrap();
775                bbcode.to_consumer();
776                let (input, expected) = $value;
777                assert_eq!(bbcode.parse(input), expected);
778            }
779        )*
780        }
781    }
782
783    #[test]
784    fn build_init() {
785        //This shouldn't fail?
786        let _bbcode = BBCode::default().unwrap();
787    }
788
789    //This isn't really a unit test but whatever
790    #[cfg(feature = "bigtest")]
791    #[test]
792    fn performance_issues() 
793    {
794        use pretty_assertions::{assert_eq};
795
796        let bbcode = BBCode::from_config(BBCodeTagConfig::extended(), None).unwrap();
797
798        let testdir = "bigtests";
799        let entries = std::fs::read_dir(testdir).unwrap();
800        let mut checks: Vec<(String,String,String)> = Vec::new();
801        for entry in entries 
802        {
803            let entry = entry.unwrap();
804            let path = entry.path();
805            let metadata = std::fs::metadata(&path).unwrap();
806
807            //Only look for files
808            if metadata.is_file() {
809                let base_text = std::fs::read_to_string(&path).unwrap();
810                let parse_path = std::path::Path::new(testdir).join("parsed").join(path.file_name().unwrap()); 
811                let parse_text = std::fs::read_to_string(&parse_path).unwrap();
812                checks.push((base_text, parse_text, String::from(path.file_name().unwrap().to_str().unwrap())));
813                println!("Found test file: {:?}", path);
814            }
815        }
816        println!("Total tests: {}", checks.len());
817        let start = std::time::Instant::now();
818        for (raw, parsed, path) in checks {
819            let test_start = start.elapsed();
820            let result = bbcode.parse(&raw);
821            let test_end = start.elapsed();
822            assert_eq!(result, parsed);
823            println!(" Test '{}' : {:?}", path, test_end - test_start);
824        }
825        let elapsed = start.elapsed();
826        println!("Parse total: {:?}", elapsed);
827    }
828
829    #[cfg(feature = "bigtest")]
830    #[test] //Not really a unit test but whatever
831    fn benchmark_10000() {
832        let bbcode = BBCode::from_config(BBCodeTagConfig::extended(), None).unwrap();
833        let parselem = vec![
834            ("it's a %CRAZY% <world> ๐Ÿ’™=\"yeah\" ๐Ÿ‘จโ€๐Ÿ‘จโ€๐Ÿ‘งโ€๐Ÿ‘ฆ>>done", 
835             "it&#x27;s a %CRAZY% &lt;world&gt; ๐Ÿ’™=&quot;yeah&quot; ๐Ÿ‘จโ€๐Ÿ‘จโ€๐Ÿ‘งโ€๐Ÿ‘ฆ&gt;&gt;done"),
836            ("[][[][6][a[ab]c[i]italic[but][][* not] 8[]]][", "[][[][6][a[ab]c<i>italic[but][][* not] 8[]]][</i>"),
837            ("[url]this[b]is[/b]a no-no[i][/url]", r#"<a href="this[b]is[/b]a no-no[i]" target="_blank">this[b]is[/b]a no-no[i]</a>"#),
838            ("[img=https://old.smiflebosicswoace.com/user_uploads/avatars/t1647374379.png]abc 123[/img]", r#"<img src="https://old.smiflebosicswoace.com/user_uploads/avatars/t1647374379.png">"#),
839            ("[spoiler]this[b]is empty[/spoiler]", r#"<details class="spoiler"><summary>Spoiler</summary>this<b>is empty</b></details>"#)
840        ];
841
842        let start = std::time::Instant::now();
843        for i in 0..10000 {
844            if let Some((input, output)) = parselem.get(i % parselem.len()) {
845                if bbcode.parse(*input) != *output {
846                    panic!("Hang on, bbcode isn't working!");
847                }
848            }
849            else {
850                panic!("WHAT? INDEX OUT OF BOUNDS??");
851            }
852        }
853        let elapsed = start.elapsed();
854        println!("10000 iterations took: {:?}", elapsed);
855    }
856
857    bbtest_basics! {
858        no_alter: ("hello", "hello");
859        lt_single: ("h<ello", "h&lt;ello");
860        gt_single: ("h>ello", "h&gt;ello");
861        amp_single: ("h&ello", "h&amp;ello");
862        quote_single: ("h'ello", "h&#x27;ello");
863        doublequote_single: ("h\"ello", "h&quot;ello");
864        return_byebye: ("h\rello", "hello");
865        newline_br: ("h\nello", "h<br>ello");
866        complex_escape: (
867            "it's a %CRAZY% <world> ๐Ÿ’™=\"yeah\" ๐Ÿ‘จโ€๐Ÿ‘จโ€๐Ÿ‘งโ€๐Ÿ‘ฆ>>done", 
868            "it&#x27;s a %CRAZY% &lt;world&gt; ๐Ÿ’™=&quot;yeah&quot; ๐Ÿ‘จโ€๐Ÿ‘จโ€๐Ÿ‘งโ€๐Ÿ‘ฆ&gt;&gt;done"
869        );
870        //"Simple" means there are no complicated tag structures, or only a single tag (most common)
871        simple_bold: ("[b]hello[/b]", "<b>hello</b>");
872        simple_sup: ("[sup]hello[/sup]", "<sup>hello</sup>");
873        simple_sub: ("[sub]hello[/sub]", "<sub>hello</sub>");
874        simple_strikethrough: ("[s]hello[/s]", "<s>hello</s>");
875        simple_underline: ("[u]hello[/u]", "<u>hello</u>");
876        simple_italic: ("[i]hello[/i]", "<i>hello</i>");
877        simple_nospaces: ("[b ]hello[/ b]", "[b ]hello[/ b]");
878        //The matches are returned lowercase from regex when insensitive
879        simple_insensitive: ("[sUp]hello[/SuP]", "<sup>hello</sup>");
880        simple_sensitivevalue: ("[sUp]OK but The CAPITALS[/SuP]YEA", "<sup>OK but The CAPITALS</sup>YEA");
881        simple_bolditalic: ("[b][i]hello[/i][/b]", "<b><i>hello</i></b>");
882        nested_bold: ("[b]hey[b]extra bold[/b] less bold again[/b]", "<b>hey<b>extra bold</b> less bold again</b>");
883        simple_url_default: ("[url]https://google.com[/url]", r#"<a href="https://google.com" target="_blank">https://google.com</a>"#);
884        simple_url_witharg: ("[url=http://ha4l6o7op9dy.com]furries lol[/url]", r#"<a href="http://ha4l6o7op9dy.com" target="_blank">furries lol</a>"#);
885        url_escape: ("[url=http'://ha4l<6o7op9dy>.com]furries lol[/url]", r#"<a href="http&#x27;://ha4l&lt;6o7op9dy&gt;.com" target="_blank">furries lol</a>"#);
886        simple_img: ("[img]https://old.smiflebosicswoace.com/user_uploads/avatars/t1647374379.png[/img]", r#"<img src="https://old.smiflebosicswoace.com/user_uploads/avatars/t1647374379.png">"#);
887        simple_img_nonstd: ("[img=https://old.smiflebosicswoace.com/user_uploads/avatars/t1647374379.png][/img]", r#"<img src="https://old.smiflebosicswoace.com/user_uploads/avatars/t1647374379.png">"#);
888        //NOTE: this one, it's just how I want it to work. IDK how the real bbcode handles this weirdness
889        //simple_img_nonstd_inner: ("[img=https://old.smiflebosicswoace.com/user_uploads/avatars/t1647374379.png]abc 123[/img]", r#"<img src="https://old.smiflebosicswoace.com/user_uploads/avatars/t1647374379.png">abc 123"#);
890        simple_img_nonstd_inner: ("[img=https://old.smiflebosicswoace.com/user_uploads/avatars/t1647374379.png]abc 123[/img]", r#"<img src="https://old.smiflebosicswoace.com/user_uploads/avatars/t1647374379.png">"#);
891        //New in 0.2.0: allow images inside urls (old bbcode matcher I was emulating didn't allow that, but that's stupid)
892        url_with_img: ("[url=https://google.com][img]https://some.image.url/junk.png[/img][/url]", r#"<a href="https://google.com" target="_blank"><img src="https://some.image.url/junk.png"></a>"#);
893        url_with_img_attr: ("[url=https://google.com][img=https://some.image.url/junk.png][/url]", r#"<a href="https://google.com" target="_blank"><img src="https://some.image.url/junk.png"></a>"#);
894        //Note: because url is special and can get the href from either the inside or the attribute, if it has no attribute,
895        //it must get it from the inside. So, although the image is a proper tag, it will not be converted in this case
896        url_with_img_nourl: ("[url][img=https://some.image.url/junk.png][/url]", r#"<a href="[img=https://some.image.url/junk.png]" target="_blank">[img=https://some.image.url/junk.png]</a>"#);
897        url_no_other_tags: ("[url=https://what.non][b][i][u][s][/url]", r#"<a href="https://what.non" target="_blank">[b][i][u][s]</a>"#);
898        //Note: this is "undefined" behavior, and it makes sense that an ignored tag (the inner [url]) would not be linked to its immediate
899        //closing tag, and so the first [/url] closes the url early. "fixing" this edge case is beyond the scope of this library, as it's 
900        //unsupported behavior anyway.
901        url_nested: ("[url=https://what.non][url=https://abc123.com][/url][/url]", r#"<a href="https://what.non" target="_blank">[url=https://abc123.com]</a>"#);
902        //This also tests auto-closed tags, albeit a simple form
903        list_basic:  ("[list][*]item 1[/*][*]item 2[/*][*]list[/*][/list]", "<ul><li>item 1</li><li>item 2</li><li>list</li></ul>");
904        unclosed_basic: ("[b] this is bold [i]also italic[/b] oops close all[/i]", "<b> this is bold <i>also italic</i></b> oops close all");
905        verbatim_url: ("[url]this[b]is[/b]a no-no[i][/url]", r#"<a href="this[b]is[/b]a no-no[i]" target="_blank">this[b]is[/b]a no-no[i]</a>"#);
906        inner_hack: ("[[b][/b]b]love[/[b][/b]b]", "[<b></b>b]love[/<b></b>b]");
907        random_brackets: ("[][[][6][a[ab]c[i]italic[but][][* not] 8[]]][", "[][[][6][a[ab]c<i>italic[but][][* not] 8[]]][</i>");
908        autolink_basic: ("this is https://google.com ok?", r#"this is <a href="https://google.com" target="_blank">https://google.com</a> ok?"#);
909
910        newline_list1: ("[list]\n[*]item", "<ul><li>item</li></ul>");
911        newline_list2: ("[list]\r\n[*]item", "<ul><li>item</li></ul>");
912        newline_listmega: ("\n[list]\r\n[*]item\r\n[*]item2 yeah[\r\n\r\n[*]three", "<br><ul><li>item</li><li>item2 yeah[<br></li><li>three</li></ul>");
913        //Bold, italic, etc should not remove newlines anywhere
914        newline_bold: ("\n[b]\nhellow\n[/b]\n", "<br><b><br>hellow<br></b><br>");
915        newline_italic: ("\n[i]\nhellow\n[/i]\n", "<br><i><br>hellow<br></i><br>");
916        newline_underline: ("\n[u]\nhellow\n[/u]\n", "<br><u><br>hellow<br></u><br>");
917        newline_strikethrough: ("\n[s]\nhellow\n[/s]\n", "<br><s><br>hellow<br></s><br>");
918        newline_sup: ("\n[sup]\nhellow\n[/sup]\n", "<br><sup><br>hellow<br></sup><br>");
919        newline_sub: ("\n[sub]\nhellow\n[/sub]\n", "<br><sub><br>hellow<br></sub><br>");
920        consume_attribute: ("[b=haha ok]but maybe? [/b]{no}", "<b>but maybe? </b>{no}");
921
922        ////Nicole's bbcode edge cases
923        e_dangling: ("[b]foo", "<b>foo</b>");
924        e_normal: ("[b]foo[/b]", "<b>foo</b>");
925        e_nested: ("[b]foo[b]bar[/b][/b]", "<b>foo<b>bar</b></b>");
926        e_empty: ("[b]foo[b][/b]bar[/b]", "<b>foo<b></b>bar</b>");
927        e_closemulti: ("[b]foo[i]bar[u]baz[/b]quux", "<b>foo<i>bar<u>baz</u></i></b>quux");
928        e_faketag: ("[b]foo[i]bar[u]baz[/fake]quux", "<b>foo<i>bar<u>baz[/fake]quux</u></i></b>");
929        e_reallyfake: ("[fake][b]foo[i]bar[u]baz[/fake]quux", "[fake]<b>foo<i>bar<u>baz[/fake]quux</u></i></b>");
930        e_ignoreclose: ("[b]foo[/b]bar[/b][/b][/b]", "<b>foo</b>bar");
931        e_weirdignoreclose: ("[b]foo[/b]bar[/fake][/b][/fake]", "<b>foo</b>bar[/fake][/fake]");
932        e_fancytag: ("[[i]b[/i]]", "[<i>b</i>]");
933        e_escapemadness: ("&[&]<[<]>[>]", "&amp;[&amp;]&lt;[&lt;]&gt;[&gt;]");
934        e_bracket_url: ("[url=#Ports][1][/url]", r##"<a href="#Ports" target="_blank">[1]</a>"##);
935    }
936
937    bbtest_nondefaults! {
938        restricted_tags: ("[s]not supported haha![/s]", "[s]not supported haha![/s]");
939        newlines_not_br: ("[b]this\n[i]\nis\n[u]silly!\n[/s]\n", "<b>this\n[i]\nis\n<u>silly!\n[/s]\n</u></b>");
940        no_target_in_url: ("[url=https://valid.com]target[/url]", "<a href=\"https://valid.com\" >target</a>");
941        //NOTE: this also tests that there's no target, and that random space in the a tag sucks
942        no_img_in_url: ("[url=https://valid.com][img=https://notvalid.net][/url]", "<a href=\"https://valid.com\" >[img=https://notvalid.net]</a>");
943        no_img_in_url_noendtag: ("[url=https://valid.com][img=https://notvalid.net][/img]", "<a href=\"https://valid.com\" >[img=https://notvalid.net][/img]</a>");
944    }
945
946    bbtest_extras! {
947        e_emptyquote: ("[quote]...[/quote]", "<blockquote>...</blockquote>");
948        e_normalquote: ("[quote=foo]...[/quote]", r#"<blockquote cite="foo">...</blockquote>"#);
949        simple_spoiler: ("[spoiler=wow]amazing[/spoiler]", r#"<details class="spoiler"><summary>wow</summary>amazing</details>"#);
950        simple_emptyspoiler: ("[spoiler]this[b]is empty[/spoiler]", r#"<details class="spoiler"><summary>Spoiler</summary>this<b>is empty</b></details>"#);
951        spoiler_simeon: ("[spoiler spoiler=what is this]i hate it[/spoiler]", r#"<details class="spoiler"><summary>what is this</summary>i hate it</details>"#);
952        cite_escape: ("[quote=it's<mad>lad]yeah[/quote]",r#"<blockquote cite="it&#x27;s&lt;mad&gt;lad">yeah</blockquote>"#);
953        h1_simple: ("[h1] so about that header [/h1]", "<h1> so about that header </h1>");
954        h2_simple: (" [h2]Not as important", " <h2>Not as important</h2>");
955        h3_simple: ("[h3][h3]wHaAt-Are-u-doin[/h3]", "<h3><h3>wHaAt-Are-u-doin</h3></h3>");
956        quote_newlines: ("\n[quote]\n\nthere once was\na boy\n[/quote]\n", "<br><blockquote><br>there once was<br>a boy<br></blockquote>");
957        anchor_simple: ("[anchor=Look_Here]The Title[/anchor]", r##"<a name="Look_Here" href="#Look_Here">The Title</a>"##);
958        anchor_inside: ("[anchor=name][h1]A title[/h1][/anchor]", r##"<a name="name" href="#name"><h1>A title</h1></a>"##);
959        icode_simple: ("[icode=Nothing Yet]Some[b]code[url][/i][/icode]", r#"<span class="icode">Some[b]code[url][/i]</span>"#);
960        code_simple: ("\n[code=SB3]\nSome[b]code[url][/i]\n[/code]\n", "<br><pre class=\"code\" data-code=\"SB3\">Some[b]code[url][/i]\n</pre>");
961        simple_customtag: ("[color=wow]amazing[/color]", r#"<span style="color:wow">amazing</span>"#);
962        simple_customtag_withdefault: ("[color=#FF5500][b][i]ama\nzing[/color]", r#"<span style="color:#FF5500"><b><i>ama<br>zing</i></b></span>"#);
963    }
964
965    bbtest_consumer! {
966        consume_standard: ("[b]wow[/b] but like [i]uh no scoping [s] rules [/sup] and ugh[/quote]", "wow but like uh no scoping  rules  and ugh");
967        //Remember, regex is still the same, so that "code" tag still consumes whitespace
968        consume_stillescape: ("<>'\"oof[img=wow][url][code][/url][/code]\n", "&lt;&gt;&#x27;&quot;oof");
969    }
970/* These tests are limitations of the old parser, I don't want to include them
971[quote=foo=bar]...[/quote]
972<blockquote>...</blockquote>
973
974[quote=[foo]]...[/quote]
975[quote=[foo]]...
976*/
977
978}