rustextile/
parser.rs

1use std::collections::{BTreeMap, HashMap};
2use std::borrow::Cow;
3use std::io::Read;
4use std::time::{SystemTime, UNIX_EPOCH};
5use std::collections::hash_map::DefaultHasher;
6use std::hash::Hasher;
7
8use indexmap::IndexMap;
9use lazy_static::lazy_static;
10use fancy_regex::{Regex, Captures, Replacer, Match};
11
12use crate::charcounter::CharCounter;
13use crate::regextra::{split_with_capture, fregex, multi_replace, multi_replace_with_one, unwrap_or_empty};
14use crate::htmltools::{generate_tag, encode_html, join_html_attributes, unescape, has_raw_text, reverse_encode_html};
15use crate::table::{process_table, TABLE_SPAN_RE_S};
16use crate::urlutils::{UrlBits, UrlString};
17use crate::block::{Block, BlockAttributes, BlockHtmlAttributes};
18use crate::regex_snips::{
19    CLS_RE_S, ALIGN_RE_S, SNIP_ACR, SNIP_ABR, SNIP_SPACE, SNIP_DIGIT,
20    SNIP_WRD, SNIP_CUR, SNIP_CHAR, LONE_AMP_RE, PNCT_RE_S, DIVIDER_RE};
21
22const SYMS_RE_S: &str = "¤§µ¶†‡•∗∴◊♠♣♥♦";
23// https://www.unicode.org/reports/tr44/#GC_Values_Table
24const BLOCK_TAGS_RE_S: &str = r"bq|bc|notextile|pre|h[1-6]|fn\d+|p|###";
25const BLOCK_TAGS_LITE_RE_S: &str = "bq|bc|p";
26const RESTRICTED_URL_SCHEMES: [&str; 4] = ["http", "https", "ftp", "mailto"];
27const UNRESTRICTED_URL_SCHEMES: [&str; 9] = ["http", "https", "ftp", "mailto", "file", "tel", "callto", "sftp", "data"];
28
29fn span_re(tag: &str) -> Regex {
30    const PNCT: &str = r#".,"'?!;:‹›«»„“”‚‘’"#;
31    fregex!(
32        &format!(
33            concat!(
34                r"(?P<pre>^|(?<=[\s>{pnct}\(])|[{{\[])",
35                r"(?P<tag>{tag})(?!{tag})",
36                r"(?P<atts>{cls})",
37                r"(?!{tag})",
38                r"(?::(?P<cite>\S+[^{tag}]{space}))?",
39                r"(?P<content>[^{space}{tag}]+|\S.*?[^\s{tag}\n])",
40                r"(?P<end>[{pnct}]*)",
41                r"{tag}",
42                r"(?P<tail>$|[\[\]}}<]|(?=[{pnct}]{{1,2}}[^0-9]|\s|\)))"),
43            tag=tag, cls=*CLS_RE_S, pnct=PNCT, space=SNIP_SPACE))
44}
45
46fn do_special<'t, R>(text: &'t str, start: &str, end: &str, method: R) -> Cow<'t, str>
47    where R: Replacer
48{
49    let pattern = Regex::new(
50        &format!(r"(?ms)(^|\s|[\[({{>|]){0}(.*?){1}($|[\])}}])?",
51                fancy_regex::escape(start),
52                fancy_regex::escape(end)))
53        .expect("A valid expression");
54
55    pattern.replace_all(text, method)
56}
57
58fn get_image_size(url: &str) -> Option<(i64, i64)> {
59    const MAX_IMAGE_CHUNK: usize = 1024;
60    let mut buffer = [0u8; MAX_IMAGE_CHUNK];
61    if let Ok(mut response) = reqwest::blocking::get(url) {
62        let mut read_total: usize = 0;
63        loop {
64            let read_result = response.read(&mut buffer[read_total..]);
65            match read_result {
66                Ok(bytes_fetched) => {
67                    if bytes_fetched == 0 { break; }
68                    read_total += bytes_fetched;
69                    if let Ok(info) = imageinfo::ImageInfo::from_raw_data(&buffer[..read_total]) {
70                        return Some((info.size.width, info.size.height));
71                    }
72                },
73                Err(_) => {
74                    return None;
75                },
76            }
77        }
78    }
79    None
80}
81
82fn make_glyph_replacers(is_html5: bool) -> [(Regex, &'static str); 22] {
83    lazy_static! {
84        static ref CUR: String = format!(
85            r"(?:[{0}]{1}*)?", SNIP_CUR, SNIP_SPACE);
86    }
87    [
88        // dimension sign
89        (fregex!(
90            &format!(
91                concat!(r#"(?i)(?<=\b|x)([0-9]+[\])]?['"]? ?)[x]( ?[\[(]?)"#,
92                        r"(?=[+-]?{0}[0-9]*\.?[0-9]+)"),
93                *CUR)),
94         r"$1&#215;$2"),
95        // apostrophe's
96        (fregex!(&format!(r"({0}|\))'({0})", SNIP_WRD)),
97         r"$1&#8217;$2"),
98        // back in '88
99        (fregex!(&format!(r"({0})'(\d+{1}?)\b(?![.]?[{1}]*?')", SNIP_SPACE, SNIP_WRD)),
100         r"$1&#8217;$2"),
101        // single opening following an open bracket.
102        (fregex!(r"([(\[{])'(?=\S)"), r"$1&#8216;"),
103        // single closing
104        (fregex!(&format!(r"(\S)'(?={0}|{1}|<|$)", SNIP_SPACE, PNCT_RE_S)),
105         r"$1&#8217;"),
106        // single opening
107        (fregex!(r"'"), r"&#8216;"),
108        // double opening following an open bracket. Allows things like
109        // Hello ["(Mum) & dad"]
110        (fregex!(r#"([(\[{])"(?=\S)"#), r"$1&#8220;"),
111        // double closing
112        (fregex!(&format!(r#"(\S)"(?={0}|{1}|<|$)"#, SNIP_SPACE, PNCT_RE_S)),
113         r"$1&#8221;"),
114        // double opening
115        (fregex!(r#"""#), r"&#8220;"),
116        // ellipsis
117        (fregex!(r"([^.]?)\.{3}"), r"$1&#8230;"),
118        // ampersand
119        (fregex!(r"(\s?)&(\s)"), r"$1&amp;$2"),
120        // em dash
121        (fregex!(r"(\s?)--(\s?)"), r"$1&#8212;$2"),
122        // en dash
123        (fregex!(r" - "), r" &#8211; "),
124        // trademark
125        (fregex!(&format!(r"(?i)(\b ?|{0}|^)[(\[]TM[\])]", SNIP_SPACE)),
126         r"$1&#8482;"),
127        // registered
128        (fregex!(&format!(r"(?i)(\b ?|{0}|^)[(\[]R[\])]", SNIP_SPACE)),
129         r"$1&#174;"),
130        // copyright
131        (fregex!(&format!(r"(?i)(\b ?|{0}|^)[(\[]C[\])]", SNIP_SPACE)),
132         r"$1&#169;"),
133        // 1/2
134        (fregex!(r"[(\[]1\/2[\])]"), r"&#189;"),
135        // 1/4
136        (fregex!(r"[(\[]1\/4[\])]"), r"&#188;"),
137        // 3/4
138        (fregex!(r"[(\[]3\/4[\])]"), r"&#190;"),
139        // degrees
140        (fregex!(r"[(\[]o[\])]"), r"&#176;"),
141        // plus/minus
142        (fregex!(r"[(\[]\+\/-[\])]"), r"&#177;"),
143        // 3+ uppercase acronym
144        (fregex!(&format!(r"\b([{0}][{1}]{{2,}})\b(?:[(]([^)]*)[)])", SNIP_ABR, SNIP_ACR)),
145         if is_html5 {r#"<abbr title="$2">$1</abbr>"#} else {r#"<acronym title="$2">$1</acronym>"#}),
146    ]
147}
148
149#[derive(Clone, Debug)]
150pub(crate) struct NoteInfo {
151    pub id: String,
152    //attrs: Option<Attrs>,
153    pub content: Option<String>,
154    pub link: Option<String>,
155    pub attrs: Option<String>,
156    pub seq: Option<String>,
157    pub refids: Vec<String>,
158}
159
160
161fn get_special_options<'a,'b>(pre: &'a str, tail: &'b str) -> (&'a str, &'b str) {
162    const SPAN_WRAPPERS: [(&str, &str); 1] = [
163        ("[", "]"),
164    ];
165    for (before, after) in SPAN_WRAPPERS {
166        if pre == before && tail == after {
167            return ("", "")
168        }
169    }
170    (pre, tail)
171}
172
173fn make_url_readable(url: &str) -> &str {
174    for pattern in ["://", ":"] {
175        if let Some(pos) = url.find(pattern) {
176            return &url[pos + pattern.len()..]
177        }
178    }
179    url
180}
181
182pub(crate) struct ParserState<'t> {
183    pub notes: BTreeMap<String, NoteInfo>,
184    pub footnotes: IndexMap<String, String>,
185    shelf: IndexMap<String, String>,
186    urlrefs: IndexMap<String, UrlString<'t>>,
187    note_index: u32,
188    link_index: u32,
189    ref_index: u32,
190    span_depth: u32,
191    ref_cache: IndexMap<u32, String>,
192    pub textile: &'t Textile,
193    ol_starts: IndexMap<String, usize>,
194    unreferenced_notes: BTreeMap<String, NoteInfo>,
195    notelist_cache: IndexMap<String, String>,
196}
197
198
199impl <'t> ParserState<'t> {
200    fn new(textile: &'t Textile) -> Self {
201        Self {
202            textile,
203            notes: Default::default(),
204            footnotes: Default::default(),
205            shelf: Default::default(),
206            urlrefs: Default::default(),
207            note_index: 1,
208            link_index: 0,
209            ref_index: 0,
210            span_depth: 0,
211            ol_starts: Default::default(),
212            ref_cache: Default::default(),
213            notelist_cache: Default::default(),
214            unreferenced_notes: Default::default(),
215        }
216    }
217
218    pub fn increment_link_index(&mut self) -> u32 {
219        self.link_index += 1;
220        self.link_index
221    }
222
223    /// Parses the note definitions and formats them as HTML
224    pub fn parse_note_defs(&mut self, m: &Captures) -> &'static str {
225        let label = &m["label"];
226        let link = &m["link"];
227        let att = &m["att"];
228        let content = &m["content"];
229
230        // Assign an id if the note reference parse hasn't found the label yet.
231        if !self.notes.contains_key(label) {
232            let new_index = self.increment_link_index();
233            self.notes.insert(
234                label.to_owned(),
235                NoteInfo {
236                    id: format!(
237                        "{0}{1}",
238                        self.textile.link_prefix,
239                        new_index),
240                    content: None,
241                    link: None,
242                    attrs: None,
243                    seq: None,
244                    refids: Default::default(),
245                });
246
247        }
248        // Ignores subs
249        if self.notes.contains_key(label) {
250            let note_content = self.graf(content).into_owned();
251            if let Some(mut note) = self.notes.get_mut(label) {
252                if note.link.is_none() {
253                    note.link = if link.is_empty() { None } else { Some(link.into()) };
254                    note.attrs = Some(
255                        BlockAttributes
256                            ::parse(att, None, true, self.textile.restricted)
257                            .into());
258                    note.content = Some(note_content);
259                }
260            }
261        }
262
263        ""
264    }
265    /// Given the pieces of a back reference link, create an <a> tag.
266    fn make_back_ref_link(info: &NoteInfo, g_links: &str, i: char) -> Cow<'t, str> {
267        fn char_code_to_entity(c: u32) -> String {
268            let entity = format!("&#{};", c);
269            unescape(&entity).into_owned()
270        }
271
272        let backlink_type = match info.link {
273            Some(ref link) => link.as_str(),
274            None => g_links,
275        };
276        let allow_inc = !SYMS_RE_S.contains(i);
277        let mut i_ = i as u32;
278
279        match backlink_type {
280            "!" => Cow::Borrowed(""),
281            "^" => {
282                if !info.refids.is_empty() {
283                    Cow::Owned(format!("<sup><a href=\"#noteref{0}\">{1}</a></sup>",
284                                       info.refids[0], char_code_to_entity(i_)))
285                } else {
286                    Cow::Borrowed("")
287                }
288            },
289            _ => {
290                let mut result = String::new();
291                for refid in info.refids.iter() {
292                    let sup = format!(
293                        "<sup><a href=\"#noteref{0}\">{1}</a></sup>",
294                        refid, char_code_to_entity(i_));
295                    if allow_inc {
296                        i_ += 1;
297                    }
298                    if !result.is_empty() {
299                        result.push(' ');
300                    }
301                    result.push_str(&sup);
302                }
303                Cow::Owned(result)
304            }
305        }
306    }
307
308    /// Parse the text for endnotes
309    fn place_note_lists<'a>(&mut self, text: &'a str) -> Cow<'a, str> {
310        if !self.notes.is_empty() {
311            let mut o = BTreeMap::<String, NoteInfo>::new();
312            for (label, info) in self.notes.iter() {
313                let mut info_clone = info.clone();
314                if let Some(ref i) = info.seq {
315                    info_clone.seq = Some(label.clone());
316                    o.insert(i.clone(), info_clone);
317                } else {
318                    self.unreferenced_notes.insert(label.clone(), info_clone);
319                }
320            }
321            self.notes = o;
322        }
323        lazy_static! {
324            static ref TEXT_RE: Regex = fregex!(
325                &format!(
326                    r"<p>notelist({0})(?:\:([\w|{1}]))?([\^!]?)(\+?)\.?[\s]*</p>",
327                    *CLS_RE_S, SYMS_RE_S));
328        }
329        // Given the text that matches as a note, format it into HTML.
330        let f_note_lists = |cap: &Captures| -> String {
331            let (att, g_links, extras) = (&cap[1], &cap[3], &cap[4]);
332
333            let start_char = match cap.get(2) {
334                Some(m) => m.as_str().chars().next().expect("Not empty"),
335                None => 'a'
336            };
337            let index = format!("{0}{1}{2}", g_links, extras, start_char);
338            let mut result = String::new();
339
340            if !self.notelist_cache.contains_key(&index) {
341                let mut o = Vec::<String>::new();
342                if !self.notes.is_empty() {
343                    for (_seq, info) in self.notes.iter() {
344                        let links = Self::make_back_ref_link(info, g_links, start_char);
345                        let li = if let NoteInfo {
346                            id: ref infoid,
347                            attrs: Some(ref atts),
348                            content: Some(ref content),
349                            ..
350                        } = *info {
351                            format!("\t\t<li{0}>{1}<span id=\"note{2}\"> </span>{3}</li>",
352                                    atts, links, infoid, content)
353                        } else {
354                            format!("\t\t<li>{0} Undefined Note [#{1}].</li>",
355                                    links, info.seq.as_deref().unwrap_or_default())
356                        };
357                        o.push(li);
358                    }
359                }
360                if extras == "+" && !self.unreferenced_notes.is_empty() {
361                    for info in self.unreferenced_notes.values() {
362                        let atts = info.attrs.as_deref().unwrap_or_default();
363                        let content = info.content.as_deref().unwrap_or_default();
364                        o.push(format!("\t\t<li{0}>{1}</li>", atts, content));
365                    }
366                }
367                result = o.join("\n");
368                self.notelist_cache.insert(index, result.clone());
369            }
370            if result.is_empty() {
371                result
372            } else {
373                let list_atts: String = BlockAttributes
374                    ::parse(att, None, true, self.textile.restricted)
375                    .into();
376                format!("<ol{0}>\n{1}\n\t</ol>", list_atts, result)
377            }
378        };
379        TEXT_RE.replace_all(text, f_note_lists)
380    }
381
382    pub fn shelve(&mut self, text: String) -> String {
383        self.ref_index += 1;
384        let item_id = format!("{0}{1}:shelve", self.textile.uid, self.ref_index);
385        self.shelf.insert(item_id.clone(), text);
386        item_id
387    }
388
389    pub fn shelve_url(&mut self, text: UrlString) -> String {
390        let escaped_url = text.to_html_string();
391        self.ref_index += 1;
392        self.ref_cache.insert(self.ref_index, escaped_url);
393        format!("{0}{1}{2}", self.textile.uid, self.ref_index, ":url")
394    }
395
396    pub fn retrieve(&self, text: String) -> String {
397        let mut new_text = text;
398        loop {
399            let old = new_text.clone();
400            for (k, v) in self.shelf.iter() {
401                new_text = new_text.replace(k, v);
402            }
403            if new_text == old {
404                break;
405            }
406        }
407        new_text
408    }
409
410    fn retrieve_urls<'a>(&mut self, text: &'a str) -> Cow<'a, str> {
411        let mut regex_cache = self.textile.regex_cache.borrow_mut();
412        let pattern = regex_cache
413            .entry(line!())
414            .or_default()
415            .entry("")
416            .or_insert_with(
417                || fregex!(&format!(r"{0}(?P<token>[0-9]+):url", self.textile.uid)));
418
419        let retrieve_url = |cap: &Captures| -> String {
420            let token = &cap["token"];
421            match token.parse::<u32>() {
422                Ok(key) => {
423                    let url = self.ref_cache.get(&key).cloned().unwrap_or_default();
424                    if url.is_empty() {
425                        url
426                    } else if let Some(rurl) = self.urlrefs.get(&url) {
427                        rurl.to_html_string()
428                    } else {
429                        url
430                    }
431                },
432                Err(_) => {
433                    String::new()
434                },
435            }
436        };
437        pattern.replace_all(text, retrieve_url)
438    }
439
440    fn f_textile(&mut self, cap: &Captures) -> String {
441        let (before, notextile) = (&cap[1], &cap[2]);
442        let after = unwrap_or_empty(cap.get(3));
443        let (before, after) = get_special_options(before, after);
444        String::from(before) + &self.shelve(notextile.to_owned()) + after
445    }
446
447    pub fn no_textile(&mut self, text: &str) -> String {
448        let step1 = do_special(text, "<notextile>", "</notextile>", |cap: &Captures| {Self::f_textile(self, cap)});
449        let step2 = do_special(&step1, "==", "==", |cap: &Captures| {Self::f_textile(self, cap)});
450        step2.into_owned()
451    }
452
453    pub fn code(&mut self, text: &str) -> String {
454        fn f_code(parser: &mut ParserState, cap: &Captures) -> String {
455            let (before, text) = (&cap[1], &cap[2]);
456            let after = unwrap_or_empty(cap.get(3));
457            let (before, after) = get_special_options(before, after);
458            let text = encode_html(text, false, false);
459            String::from(before) + &parser.shelve(format!("<code>{0}</code>", text)) + after
460        }
461
462        fn f_pre(parser: &mut ParserState, cap: &Captures) -> String {
463            let (before, text) = (&cap[1], &cap[2]);
464            let after = unwrap_or_empty(cap.get(3));
465            let (before, after) = get_special_options(before, after);
466            // text needs to be escaped
467            let text = encode_html(text, true, false);
468            String::from(before) + "<pre>" + &parser.shelve(text) + "</pre>" + after
469        }
470
471        let text = do_special(text, "<code>", "</code>", |cap: &Captures| f_code(self, cap));
472        let text = do_special(&text, "@", "@", |cap: &Captures| f_code(self, cap));
473        do_special(&text, "<pre>", "</pre>", |cap: &Captures| f_pre(self, cap)).into_owned()
474    }
475
476    fn get_html_comments<'a>(&mut self, text: &'a str) -> Cow<'a, str> {
477        // Search the string for HTML comments, e.g. <!-- comment text -->
478        do_special(text, "<!--", "-->", |cap: &Captures| -> String {
479            // If self.restricted is True, clean the matched contents of the HTML
480            // comment.  Otherwise, return the comments unchanged.
481            // The original php had an if statement in here regarding restricted mode.
482            // nose reported that this line wasn't covered.  It's correct.  In
483            // restricted mode, the html comment tags have already been converted to
484            // &lt;!*#8212; and &#8212;&gt; so they don't match in getHTMLComments,
485            // and never arrive here.
486            let (before, comment_text) = (&cap[1], &cap[2]);
487            format!("{0}<!--{1}-->", before, self.shelve(comment_text.to_owned()))
488        })
489    }
490
491    /// Assuming that in the restricted mode all input was html-encoded
492    /// prior to any real parsing, we need to undo the encoding in order to
493    /// handle the links properly (once the normalization is done they will
494    /// be html-encoded again anyway).
495    pub(crate) fn unrestrict_url<'u>(&self, url: &'u str) -> Cow<'u, str> {
496        if self.textile.restricted {
497            reverse_encode_html(url)
498        } else {
499            url.into()
500        }
501    }
502
503    /// Capture and store URL references in `self.urlrefs`.
504    fn get_refs<'a>(&mut self, text: &'a str) -> Cow<'a, str> {
505        fn make_url_ref_re(schemes: &[&str]) -> Regex {
506            fregex!(
507                &format!(
508                    r"(?:(?<=^)|(?<=\s))\[(.+)\]((?:{0}:\/\/|\/)\S+)(?=\s|$)",
509                    schemes.join("|")))
510        }
511        lazy_static! {
512            static ref RESTRICTED_URLREF_RE: Regex = make_url_ref_re(&RESTRICTED_URL_SCHEMES[..]);
513            static ref UNRESTRICTED_URLREF_RE: Regex = make_url_ref_re(&UNRESTRICTED_URL_SCHEMES[..]);
514        }
515        let urlref_re: &Regex = if self.textile.restricted {
516            &RESTRICTED_URLREF_RE
517        } else {
518            &UNRESTRICTED_URLREF_RE
519        };
520        urlref_re.replace_all(text, |cap: &Captures| -> &str {
521            let flag = &cap[1];
522            let url = self.unrestrict_url(&cap[2]).into_owned();
523            self.urlrefs.insert(
524                flag.to_string(),
525                url.into());
526            ""
527        })
528    }
529
530
531    fn image<'a>(&mut self, text: &'a str) -> Cow<'a, str> {
532        lazy_static! {
533            static ref PATTERN: Regex = fregex!(
534                &format!(
535                    concat!(
536                        r"(?:[\[{{])?",               // pre
537                        r"\!",                        // opening !
538                        r"([<>=]|&lt;|&gt;)?",        // optional alignment atts
539                        r"({0})",                     // optional style,class atts
540                        r"(?:\.\s)?",                 // optional dot-space
541                        r"([^\s(!]+)",                // presume this is the src
542                        r"\s?",                       // optional space
543                        r"(?:\(([^\)]+)\))?",         // optional title
544                        r"\!",                        // closing
545                        r"(?::(\S+)(?<![\]).,]))?",   // optional href
546                        r"(?:[\]}}]|(?=[.,\s)|]|$))", // lookahead: space or end of string
547                    ),
548                    *CLS_RE_S));
549        }
550        let f_image = |cap: &Captures| -> String {
551            let url = &cap[3];
552            if !self.is_valid_url(url) {
553                return cap[0].to_owned();
554            }
555            let mut atts = if let Some(attributes) = cap.get(2) {
556                BlockAttributes::parse(attributes.as_str(), None, true, self.textile.restricted).html_attrs()
557            } else {
558                BlockHtmlAttributes::default()
559            };
560
561
562            if let Some(align) = cap.get(1) {
563                let alignment = match align.as_str() {
564                    "<" | "&lt;" => "left",
565                             "=" => "center",
566                    ">" | "&gt;" => "right",
567                    _ => unreachable!("Not allowed by regex")
568                };
569                let use_align_class = match self.textile.align_class_enabled {
570                    Some(v) => v,
571                    None => match self.textile.html_type {
572                        HtmlKind::XHTML => false,
573                        HtmlKind::HTML5 => true,
574                    }
575                };
576                if use_align_class {
577                    atts.insert_css_class(format!("align-{}", alignment));
578                } else {
579                    atts.insert("align", alignment.to_owned());
580                }
581            }
582
583            let optional_title = cap.get(4).map(|m| m.as_str());
584            atts.insert("alt", optional_title.unwrap_or_default().to_owned());
585
586            if !UrlBits::parse(url).is_relative() && self.textile.get_sizes {
587                if let Some((width, height)) = get_image_size(url) {
588                    atts.insert("height", height.to_string());
589                    atts.insert("width", width.to_string());
590                }
591            };
592            let url_id = self.shelve_url(
593                self.unrestrict_url(url).into());
594            atts.insert("src", url_id);
595
596            if let Some(title) = optional_title {
597                atts.insert("title", title.to_owned());
598            }
599
600            let img = generate_tag("img", None, &atts);
601            let out = if let Some(href) = cap.get(5) {
602                let shelved_href = self.shelve_url(
603                    self.unrestrict_url(href.as_str()).into());
604                if !shelved_href.is_empty() {
605                    generate_tag(
606                        "a",
607                        Some(&img),
608                        &[("href".into(), shelved_href)])
609                } else {
610                    img
611                }
612            } else {
613                img
614            };
615            self.shelve(out)
616        };
617        PATTERN.replace_all(text, f_image)
618    }
619
620
621    fn links(&mut self, text: &str) -> String {
622        let marked_text = self.mark_start_of_links(text);
623        let result = self.replace_links(&marked_text).into_owned();
624        result
625    }
626
627    // Finds and marks the start of well formed links in the input text."""
628    // Slice text on '":<not space>' boundaries. These always occur in
629    // inline links between the link text and the url part and are much more
630    // infrequent than '"' characters so we have less possible links to
631    // process.
632    fn mark_start_of_links(&self, text: &str) -> String {
633        lazy_static! {
634            static ref SLICE_RE: Regex = fregex!(
635                &format!("\":(?={})", SNIP_CHAR));
636        }
637
638        let mut slices: Vec<_> = split_with_capture(&SLICE_RE, text).collect();
639
640        if slices.len() <= 1 {
641            return text.into();
642        }
643        let mut output: Vec<Cow<str>> = Vec::new();
644
645        let last_slice = slices.pop().expect("Verified, not empty");
646        lazy_static! {
647            static ref START_NOSPACE_RE: Regex = fregex!(r"^\S|=$");
648            static ref END_NOSPACE_RE: Regex = fregex!(r"\S$");
649        }
650        for s in slices {
651            // If there is no possible start quote then this slice is not
652            // a link
653            if !s.contains('"') {
654                output.push(Cow::Borrowed(s));
655                continue;
656            }
657            // Cut this slice into possible starting points wherever we find
658                // a '"' character. Any of these parts could represent the start
659            // of the link text - we have to find which one.
660            let mut possible_start_quotes: Vec<_> = s.split('"').collect();
661
662            // Start our search for the start of the link with the closest
663            // prior quote mark.
664            let mut possibility = possible_start_quotes
665                .pop()
666                .expect("checked above, at least one value must be present");
667
668            // Init the balanced count. If this is still zero at the end of
669            // our do loop we'll mark the " that caused it to balance as the
670            // start of the link and move on to the next slice.
671            let mut balanced = 0;
672            let mut linkparts = Vec::<&str>::new();
673            let mut i = 0;
674
675            while balanced != 0 || i == 0 {
676                // Starting at the end, pop off the previous part of the
677                // slice's fragments.
678
679                // Add this part to those parts that make up the link text.
680                linkparts.push(possibility);
681
682                if !possibility.is_empty() {
683                    if START_NOSPACE_RE.find(possibility).unwrap_or(None).is_some() {
684                        balanced -= 1;
685                    }
686                    if END_NOSPACE_RE.find(possibility).unwrap_or(None).is_some() {
687                        balanced += 1;
688                    }
689                    if let Some(p) = possible_start_quotes.pop() {
690                        possibility = p;
691                    }
692                } else {
693                    // If quotes occur next to each other, we get zero
694                    // length strings.  eg. ...""Open the door,
695                    // HAL!"":url...  In this case we count a zero length in
696                    // the last position as a closing quote and others as
697                    // opening quotes.
698                    balanced += if i == 0 { 1 } else { - 1 };
699                    i += 1;
700                    if let Some(p) = possible_start_quotes.pop() {
701                        possibility = p;
702                    } else {
703                        // If out of possible starting segments we back the
704                        // last one from the linkparts array
705                        linkparts.pop();
706                        break;
707                    }
708                    // If the next possibility is empty or ends in a space
709                    // we have a closing ".
710                    if possibility.is_empty() || possibility.ends_with(' ') {
711                        // force search exit
712                        balanced = 0;
713                    }
714                }
715
716                if balanced <= 0 {
717                    possible_start_quotes.push(possibility);
718                    break;
719                }
720            }
721
722            // Rebuild the link's text by reversing the parts and sticking
723            // them back together with quotes.
724            linkparts.reverse();
725            let link_content = linkparts.join("\"");
726            // Rebuild the remaining stuff that goes before the link but
727            // that's already in order.
728            let pre_link = possible_start_quotes.join("\"");
729            // Re-assemble the link starts with a specific marker for the
730            // next regex.
731            let o = format!(
732                "{0}{1}linkStartMarker:\"{2}",
733                pre_link, self.textile.uid, link_content);
734            output.push(Cow::Owned(o));
735        }
736
737
738        // Add the last part back
739        output.push(Cow::Borrowed(last_slice));
740        // Re-assemble the full text with the start and end markers
741        output.join("\":")
742    }
743
744    fn table<'a>(&mut self, text: &'a str) -> Cow<'a, str> {
745        lazy_static! {
746            static ref PATTERN: Regex = fregex!(
747                &format!(
748                    concat!(
749                        r"(?ms)^(?:table(?P<tatts>_?{s}{a}{c})\.",
750                        r"(?P<summary>.*?)\n)?^(?P<rows>{a}{c}\.? ?\|.*\|)",
751                        r"[\s]*\n\n"),
752                    s=*TABLE_SPAN_RE_S,
753                    a=*ALIGN_RE_S,
754                    c=*CLS_RE_S));
755        }
756        let text = format!("{0}\n\n", text);
757        match PATTERN.captures(&text) {
758            Ok(Some(cap)) => process_table(
759                self,
760                unwrap_or_empty(cap.name("tatts")),
761                &cap["rows"],
762                cap.name("summary").map(|m| m.as_str())).into(),
763            _ => text.into()
764        }
765    }
766
767    /// Parse the text for definition lists and send them to be
768    /// formatted.
769    pub(crate) fn redcloth_list<'a>(&mut self, text: &'a str) -> Cow<'a, str> {
770        lazy_static! {
771            static ref PATTERN: Regex = fregex!(
772                &format!(r"(?ms)^([-]+{0}[ .].*:=.*)$(?![^-])", *CLS_RE_S));
773            static ref SPLITTER: Regex = fregex!(
774                r"(?m)\n(?=[-])");
775
776            // parses the attributes and the content
777            static ref ATTR_CONTENT_RE: Regex = fregex!(
778                &format!(r"(?ms)^[-]+({0})\.? (.*)$", *CLS_RE_S));
779            // splits the content into the term and definition
780            static ref XM_RE: Regex = fregex!(
781                &format!(r"(?s)^(.*?){0}*:=(.*?){0}*(=:|:=)?{0}*$",
782                         SNIP_SPACE));
783        }
784
785        let f_rc_list = |cap: &Captures| -> String {
786            let mut out = Vec::<Cow<str>>::new();
787            for line in split_with_capture(&SPLITTER, &cap[0]) {
788                if let Ok(Some(m)) = ATTR_CONTENT_RE.captures(line) {
789                    let atts = &m[1];
790                    let content = m[2].trim();
791                    let html_atts_str: String = BlockAttributes
792                        ::parse(atts, None, true, self.textile.restricted)
793                        .into();
794
795                    let xm_capture = XM_RE.captures(content);
796                    let (term, definition) = if let Ok(Some(ref xm)) = xm_capture {
797                        (xm[1].trim(), xm[2].trim_matches(' '))
798                    } else {
799                        (content, "")
800                    };
801
802                    // if this is the first time through, out as a bool is False
803                    if out.is_empty() {
804                        let dltag = if definition.is_empty() {
805                            format!("<dl{0}>", html_atts_str).into()
806                        } else {
807                            "<dl>".into()
808                        };
809                        out.push(dltag);
810                    }
811
812                    if !term.is_empty() {
813                        let newline_started_def = definition.starts_with('\n');
814                        let mut definition = definition
815                            .trim()
816                            .replace('\n', self.textile.proper_br_tag());
817
818                        if newline_started_def {
819                            definition = format!("<p>{0}</p>", definition);
820                        }
821                        let term = term.replace('\n', self.textile.proper_br_tag());
822
823                        let term = self.graf(&term);
824                        let definition = self.graf(&definition);
825
826                        out.push(format!("\t<dt{0}>{1}</dt>", html_atts_str, term).into());
827                        if !definition.is_empty() {
828                            out.push(format!("\t<dd>{0}</dd>", definition).into());
829                        }
830                    }
831
832                } else {
833                    continue;
834                }
835            }
836            if !out.is_empty() {
837                out.push(Cow::Borrowed("</dl>"));
838            }
839            out.join("\n")
840        };
841
842        PATTERN.replace_all(text, f_rc_list)
843    }
844
845    pub(crate) fn textile_lists<'a>(&mut self, text: &'a str) -> Cow<'a, str> {
846        lazy_static! {
847            static ref PATTERN: Regex = fregex!(
848                &format!(
849                    concat!(r"(?ms)^((?:[*;:]+|[*;:#]*#(?:_|\d+)?){0}[ .].*)$",
850                            r"(?![^#*;:])"),
851                    *CLS_RE_S));
852            static ref SPLITTER: Regex = fregex!(r"(?m)\n(?=[*#;:])");
853            static ref LINE_PARSER: Regex = fregex!(
854                &format!(
855                    concat!(
856                        r"(?s)^(?P<tl>[#*;:]+)(?P<st>_|\d+)?(?P<atts>{0})[ .]",
857                        r"(?P<content>.*)$"),
858                    *CLS_RE_S));
859        }
860        struct ListItem<'t> {
861            atts: &'t str,
862            content: Cow<'t, str>,
863            level: usize,
864            tl: &'t str,
865            st: &'t str,
866        }
867
868        fn list_type(tl: &str) -> &'static str {
869            lazy_static! {
870                static ref START_RE: Regex = fregex!(r"^([#*]+)");
871            }
872            match START_RE.captures(tl) {
873                Ok(Some(m)) => if m[1].ends_with('#') { "ol" } else { "ul" },
874                _ => "dl"
875            }
876        }
877
878        let f_textile_list = |cap: &Captures| -> String {
879            let text = &cap[0];
880            let lines = split_with_capture(&SPLITTER, text);
881            let mut list_items = Vec::<ListItem>::new();
882            for line in lines {
883                if let Ok(Some(m)) = LINE_PARSER.captures(line) {
884                    // A new list item starts here
885                    let tl = unwrap_or_empty(m.name("tl"));
886                    list_items.push(
887                        ListItem {
888                            tl,
889                            atts: unwrap_or_empty(m.name("atts")),
890                            content: unwrap_or_empty(m.name("content")).into(),
891                            level: tl.len(),
892                            st: unwrap_or_empty(m.name("st")),
893                        });
894                } else {
895                    // just a continuation of the previous list item
896                    if let Some(last_item) = list_items.last_mut() {
897                        last_item.content += "\n";
898                        last_item.content += line;
899                    }
900                }
901            }
902            if list_items.is_empty() || list_items[0].level > 1 {
903                return cap[0].to_owned();
904            }
905            let mut prev: Option<&ListItem> = None;
906
907            let mut lists = IndexMap::<&str, usize>::new();
908            let mut out = Vec::<String>::new();
909            let mut litem = "";
910            for (index, item) in list_items.iter().enumerate() {
911                let content = item.content.trim();
912                let ltype = list_type(item.tl);
913                litem = if item.tl.contains(';') {
914                    "dt"
915                } else if item.tl.contains(':') {
916                    "dd"
917                } else {
918                    "li"
919                };
920                let next = list_items.get(index + 1);
921                let show_item = !content.is_empty();
922
923                let mut atts = BlockAttributes
924                    ::parse(item.atts, None, true, self.textile.restricted)
925                    .html_attrs();
926                // let mut start: Option<usize> = None;
927                if ltype == "ol" {
928                    let start_value = self.ol_starts.entry(item.tl.to_string()).or_insert(1);
929                    if prev.map(|p| item.level > p.level).unwrap_or(true) {
930                        if item.st.is_empty() {
931                            *start_value = 1;
932                        } else if item.st != "_" {
933                            if let Ok(int_st) = item.st.parse() {
934                                *start_value = int_st;
935                            }
936                        }
937
938                        if !item.st.is_empty() {
939                            atts.insert("start", start_value.to_string());
940                        }
941                    }
942
943                    if show_item {
944                        *start_value += 1;
945                    }
946                }
947
948                if let Some(p) = prev {
949                    if p.tl.contains(';') && item.tl.contains(':') {
950                        lists.insert(item.tl, 2);
951                    }
952                }
953                let tabs = "\t".repeat(item.level - 1);
954                let mut line = if !lists.contains_key(item.tl) {
955                    lists.insert(item.tl, 1);
956                    if show_item {
957                        format!(
958                            "{0}<{1}{2}>\n{0}\t<{3}>{4}",
959                            tabs, ltype, atts.to_string(),
960                            litem, content)
961                    } else {
962                        format!(
963                            "{0}<{1}{2}>",
964                            tabs, ltype, atts.to_string())
965                    }
966                } else if show_item {
967                    format!(
968                        "{0}\t<{1}{2}>{3}",
969                        tabs, litem, atts.to_string(), content)
970                } else {
971                    String::new()
972                };
973
974                if show_item && next.map(|n| n.level <= item.level).unwrap_or(true) {
975                    line += &format!("</{0}>", litem);
976                }
977
978                for (k, v) in lists.clone().iter().rev() {
979                    let indent = k.len();
980                    if next.map(|n| indent > n.level).unwrap_or(true) {
981                        if *v != 2 {
982                            line += &format!("\n{0}</{1}>", tabs, list_type(k));
983                            if indent > 1 {
984                                line += "</";
985                                line += litem;
986                                line += ">";
987                            }
988                        }
989                        lists.shift_remove(k);
990                    }
991                }
992                prev = Some(item);
993                out.push(line);
994            }
995            let merged_out = out.join("\n");
996            self.do_tag_br(litem, &merged_out).into_owned()
997        };
998
999        PATTERN.replace_all(text, f_textile_list)
1000    }
1001
1002    /// Inserts <br> before each newline within a specified HTML tag,
1003    /// unless it is inappropriate (like when a <br> is already there).
1004    pub(crate) fn do_tag_br<'a>(&mut self, tag: &'static str, input: &'a str) -> Cow<'a, str> {
1005
1006        fn eq_ignore_ascii_case(a: &str, b: &str) -> bool {
1007            if a.len() == b.len() {
1008                a.chars().zip(b.chars()).all(|(a_c, b_c)| a_c.eq_ignore_ascii_case(&b_c))
1009            } else {
1010                false
1011            }
1012        }
1013        // Performs an equivalent of
1014        // `replace_all`with a regular expression
1015        // r"(?i)(.+)(?!(?<=</dd>|</dt>|</li>|<br/>)|(?<=<br>)|(?<=<br />))\n(?![\s|])"
1016        // and the replacement "$1\n".
1017        // This is done to avoid panic about BacktrackLimitExceeded
1018        // within fancy_regex::Regex::replace_all, without the need to increase
1019        // the limit via fancy_regex::RegexBuilder::backtrack_limit.
1020        fn insert_brs<'c>(text: &'c str, br: &str) -> Cow<'c, str> {
1021            let num_newlines = text.match_indices('\n').count();
1022            if num_newlines == 0 {
1023                return text.into()
1024            }
1025            // The <br> is not appropriate to insert after the following prefixes
1026            const STOP_PREFIXES: [&str; 6] = ["</dd>", "</dt>", "</li>", "<br/>", "<br>", "<br />"];
1027            let mut output = String::with_capacity(text.len() + num_newlines * br.len());
1028            let lc_text = text.to_lowercase();
1029            let mut next_start = 0;
1030            while let Some(rel_newline_pos) = lc_text[next_start..].find('\n') {
1031                let abs_newline_pos = next_start + rel_newline_pos;
1032                output += &text[next_start..abs_newline_pos];
1033                // Make sure the following characters do not make <br> inappropriate
1034                let is_next_good = !lc_text[abs_newline_pos + 1..]
1035                    .starts_with(|c| char::is_whitespace(c) || c == '|');
1036                if is_next_good {
1037                    // The preceding sequence should also be appropriate for <br>
1038                    let is_prefix_good = !STOP_PREFIXES.iter().any(|p| {
1039                        let prefix_start = abs_newline_pos - p.len().min(abs_newline_pos);
1040                        let prefix = &lc_text[prefix_start..abs_newline_pos];
1041                        eq_ignore_ascii_case(prefix, *p)
1042                    });
1043                    if is_prefix_good {
1044                        output += br;
1045                    }
1046                }
1047                output.push('\n');
1048                next_start = abs_newline_pos + 1;
1049            }
1050            if output.is_empty() {
1051                text.into()
1052            } else {
1053                output += &text[next_start..];
1054                output.into()
1055            }
1056        }
1057
1058        let mut regex_cache = self.textile.regex_cache.borrow_mut();
1059        let pattern = regex_cache
1060            .entry(line!())
1061            .or_default()
1062            .entry(tag)
1063            .or_insert_with(
1064                || fregex!(
1065                    &format!(r"(?s)<{0}([^>]*?)>(.*)</{0}>",
1066                             fancy_regex::escape(tag))));
1067
1068        let br_tag = self.textile.proper_br_tag();
1069        pattern.replace_all(input, |cap: &Captures| -> String {
1070            let content = insert_brs(&cap[2], br_tag);
1071            format!("<{0}{1}>{2}</{0}>", tag, &cap[1], content)
1072        })
1073    }
1074
1075    fn do_p_br<'a>(&mut self, input: &'a str) -> Cow<'a, str> {
1076        lazy_static! {
1077            static ref TAG_RE: Regex = fregex!(r"(?s)<(p|h[1-6])([^>]*?)>(.*)(</\1>)");
1078            static ref BR_RE: Regex = fregex!(
1079                &format!(r"(?i)<br[ ]*/?>{0}*\n(?![{0}|])", SNIP_SPACE));
1080            static ref NEWLINE_RE: Regex = fregex!(r"\n(?![\s|])");
1081        }
1082
1083        let f_do_p_br = |cap: &Captures| -> String {
1084            let text = &cap[3];
1085            let text = BR_RE.replace_all(text, "\n");
1086            let text = NEWLINE_RE.replace_all(
1087                &text,
1088                self.textile.proper_br_tag());
1089            format!("<{0}{1}>{2}{3}", &cap[1], &cap[2], text, &cap[4])
1090        };
1091        TAG_RE.replace_all(input, f_do_p_br)
1092    }
1093
1094
1095    fn footnote_ref<'a>(&mut self, text: &'a str) -> Cow<'a, str> {
1096        lazy_static! {
1097            static ref PATTERN: Regex = fregex!(
1098                &format!(
1099                    r"(?<=\S)\[(?P<id>{0}+)(?P<nolink>!?)\](?P<space>{1}?)",
1100                    SNIP_DIGIT,
1101                    SNIP_SPACE));
1102        }
1103
1104        let f_footnote_id = |cap: &Captures| -> String {
1105            let mut fn_att = Vec::<(String, String)>::new();
1106            fn_att.push(("class".to_owned(), "footnote".to_owned()));
1107
1108            let match_id = &cap["id"];
1109            if !self.footnotes.contains_key(match_id) {
1110                let new_index = self.increment_link_index();
1111                let fn_id = format!("{0}{1}", self.textile.link_prefix, new_index);
1112                fn_att.push(("id".to_owned(), format!("fnrev{0}", &fn_id)));
1113                self.footnotes.insert(match_id.to_owned(), fn_id);
1114            }
1115            let fn_id = &self.footnotes[match_id];
1116            let link_tag = generate_tag(
1117                "a",
1118                Some(match_id),
1119                &[("href".to_owned(), format!("#fn{0}", fn_id))]);
1120            let sup_tag = match cap.name("nolink") {
1121                Some(m) if m.as_str() == "!" => {
1122                    generate_tag("sup", Some(match_id), &fn_att)
1123                },
1124                _ => generate_tag("sup", Some(&link_tag), &fn_att)
1125            };
1126            format!("{0}{1}", sup_tag, &cap["space"])
1127        };
1128
1129        PATTERN.replace_all(text, f_footnote_id)
1130    }
1131
1132    /// Search the text looking for note references.
1133    fn note_ref<'a>(&mut self, text: &'a str) -> Cow<'a, str> {
1134        lazy_static! {
1135            static ref TEXT_RE: Regex = fregex!(
1136                &format!(
1137                    concat!(
1138                        r"\[",          // start
1139                        r"({0})",      // !atts
1140                        r"\#",
1141                        r"([^\]!]+)",  // !label
1142                        r"([!]?)",     // !nolink
1143                        r"\]"),
1144                    *CLS_RE_S));
1145        }
1146
1147        // Parse and format the matched text into note references.
1148        // By the time this function is called, all the defs will have been
1149        // processed into the notes array. So now we can resolve the link numbers
1150        // in the order we process the refs...
1151        let f_parse_note_refs = |cap: &Captures| -> String {
1152            let (atts, label, nolink) = (&cap[1], &cap[2], &cap[3]);
1153            let html_atts = BlockAttributes::parse(atts, None, true, self.textile.restricted).html_attrs();
1154
1155            // Assign a sequence number to this reference if there isn't one already
1156            let num = if let Some(NoteInfo{seq: Some(num), ..}) = self.notes.get(label) {
1157                num.clone()
1158            } else {
1159                let num = self.note_index.to_string();
1160                self.notes.insert(
1161                    label.to_string(),
1162                    NoteInfo {
1163                        seq: Some(num.clone()),
1164                        id: "".to_owned(),
1165                        refids: Default::default(),
1166                        attrs: None,
1167                        content: None,
1168                        link: None,
1169                    });
1170                self.note_index += 1;
1171                num
1172            };
1173
1174            //  Make our anchor point and stash it for possible use in backlinks when
1175            //  the note list is generated later...
1176            let new_index = self.increment_link_index();
1177            let refid = format!("{0}{1}", self.textile.link_prefix, new_index);
1178            let is_note_id_empty = self.notes[label].id.is_empty();
1179            let new_id: Cow<str> = if is_note_id_empty {
1180                let new_index = self.increment_link_index();
1181                format!("{0}{1}", self.textile.link_prefix, new_index).into()
1182            } else {
1183                "".into()
1184            };
1185            // Build the link (if any)...
1186            let mut result = format!("<span id=\"noteref{0}\">{1}</span>", &refid, num);
1187            if nolink != "!" {
1188                result = format!("<a href=\"#note{0}\">{1}</a>", &new_id, result);
1189            }
1190            self.notes.entry(label.to_owned()).and_modify(|note_ref| {
1191                note_ref.refids.push(refid);
1192                if is_note_id_empty {
1193                    note_ref.id.replace_range(.., &new_id);
1194                }
1195            });
1196            // Build the reference...
1197            generate_tag("sup", Some(&result), &html_atts)
1198        };
1199        TEXT_RE.replace_all(text, f_parse_note_refs)
1200    }
1201
1202
1203
1204    /// Because of the split command, the regular expressions are different for
1205    /// when the text at the beginning and the rest of the text.
1206    /// for example:
1207    /// let's say the raw text provided is "*Here*'s some textile"
1208    /// before it gets to this glyphs method, the text has been converted to
1209    /// "<strong>Here</strong>'s some textile"
1210    /// When run through the split, we end up with ["<strong>", "Here",
1211    /// "</strong>", "'s some textile"].  The re.search that follows tells it
1212    /// not to ignore html tags.
1213    /// If the single quote is the first character on the line, it's an open
1214    /// single quote.  If it's the first character of one of those splits, it's
1215    /// an apostrophe or closed single quote, but the regex will bear that out.
1216    /// A similar situation occurs for double quotes as well.
1217    /// So, for the first pass, we use the glyph_search_initial set of
1218    /// regexes.  For all remaining passes, we use glyph_search
1219    fn glyphs<'a>(&mut self, text: &'a str) -> Cow<'a, str> {
1220        lazy_static! {
1221            static ref HTML5_GLYPH_REPLACERS: [(Regex, &'static str); 22] = make_glyph_replacers(true);
1222            static ref XHTML_GLYPH_REPLACERS: [(Regex, &'static str); 22] = make_glyph_replacers(false);
1223            static ref SPLITTER_RE: Regex = fregex!(r"(<[\w\/!?].*?>)");
1224        }
1225
1226        let text = text.trim_end_matches('\n');
1227        let mut result = Vec::new();
1228
1229        let replacers = match self.textile.html_type {
1230            HtmlKind::HTML5 => &HTML5_GLYPH_REPLACERS[..],
1231            HtmlKind::XHTML => &XHTML_GLYPH_REPLACERS[..],
1232        };
1233        // split the text by any angle-bracketed tags
1234        for (i, raw_line) in split_with_capture(&SPLITTER_RE, text).enumerate() {
1235            result.push(
1236                if i % 2 == 0 {
1237                    let raw_line = if !self.textile.restricted {
1238                        Cow::Owned(
1239                            LONE_AMP_RE.replace_all(raw_line, "&amp;")
1240                                  .replace('<', "&lt;")
1241                                  .replace('>', "&gt;"))
1242                    } else {
1243                        Cow::Borrowed(raw_line)
1244                    };
1245                    multi_replace(
1246                        raw_line,
1247                        replacers
1248                            .iter()
1249                            .map(|item| (&item.0, item.1))
1250                            .chain(self.textile.dyn_glyph_replacers.iter()
1251                                   .map(|item| (&item.0, item.1.as_str())))
1252                    ).into()
1253                } else {
1254                    Cow::Borrowed(raw_line)
1255                });
1256        }
1257        result.join("").into()
1258    }
1259
1260    fn replace_links<'a>(&mut self, text: &'a str) -> Cow<'a, str> {
1261        /// Replaces links with tokens and stores them on the shelf
1262        const STOPCHARS:&str = r#"\s|^'"*"#;
1263        let mut regex_cache = self.textile.regex_cache.borrow_mut();
1264        let needle = format!("{0}linkStartMarker:", self.textile.uid);
1265        let pattern = regex_cache
1266            .entry(line!())
1267            .or_default()
1268            .entry("")
1269            .or_insert_with(
1270                || fregex!(
1271                    &format!(
1272                        concat!(
1273                            // Optionally open with a square bracket eg. Look ["here":url]
1274                            r"(?P<pre>\[)?",
1275                            // marks start of the link
1276                            "{0}\"",
1277                            // grab the content of the inner "..." part of the link, can be anything but
1278                            // do not worry about matching class, id, lang or title yet
1279                            r"(?P<inner>(?:.|\n)*?)",
1280                            // literal ": marks end of atts + text + title block
1281                            "\":",
1282                            // url upto a stopchar
1283                            r"(?P<urlx>[^{1}]*)"),
1284                        needle, STOPCHARS)));
1285
1286        let mut f_link = |cap: &Captures| -> String {
1287            let in_ = &cap[0];
1288            let mut pre = unwrap_or_empty(cap.get(1)).to_owned();
1289            let inner = cap[2].replace('\n', self.textile.proper_br_tag());
1290            let mut url = &cap[3];
1291            if inner.is_empty() {
1292                return format!(r#"{0}"{1}":{2}"#, pre, inner, url);
1293            }
1294            lazy_static! {
1295                static ref BLOCK_RE: Regex = fregex!(
1296                    &format!(
1297                        concat!(
1298                            r"^",
1299                            r"(?P<atts>{0})", // $atts (if any)
1300                            r"{1}*", // any optional spaces
1301                            r"(?P<text>",  // $text is...
1302                            r"(!.+!)", // an image
1303                            r"|", //  else...
1304                            r".+?", //  link text
1305                            r")", // end of $text
1306                            r"(?:\((?P<title>[^)]+?)\))?",  // $title (if any)
1307                            r"$"),
1308                        *CLS_RE_S, SNIP_SPACE));
1309            }
1310
1311            let (atts, text, title) = if let Ok(Some(m)) = BLOCK_RE.captures(&inner) {
1312                let m_text = unwrap_or_empty(m.name("text"));
1313                (unwrap_or_empty(m.name("atts")),
1314                 if m_text.is_empty() { inner.as_str() } else { m_text },
1315                 unwrap_or_empty(m.name("title")))
1316            } else {
1317                ("", inner.as_str(), "")
1318            };
1319            let mut pop = String::new();
1320            let mut tight = String::new();
1321            let csb_count: usize = url.matches(']').count();
1322            let mut counts = CharCounter::new(['[', ']', '(', ')']);
1323            counts[']'] = Some(csb_count);
1324            // Look for footnotes or other square-bracket delimited stuff at the end
1325            // of the url...
1326            //
1327            // eg. "text":url][otherstuff... will have "[otherstuff" popped back
1328            // out.
1329            //
1330            // "text":url?q[]=x][123]    will have "[123]" popped off the back, the
1331            // remaining closing square brackets will later be tested for balance
1332            if csb_count > 0 {
1333                lazy_static! {
1334                    static ref URL_RE: Regex = fregex!(r"(?P<url>^.*\])(?P<tight>\[.*?)$");
1335                }
1336
1337                if let Ok(Some(m)) = URL_RE.captures(url) {
1338                    url = unwrap_or_empty(m.get(1));
1339                    tight.replace_range(.., &m[2]);
1340                }
1341            }
1342            // Split off any trailing text that isn't part of an array assignment.
1343            // eg. "text":...?q[]=value1&q[]=value2 ... is ok
1344            // "text":...?q[]=value1]following  ... would have "following" popped
1345            // back out and the remaining square bracket will later be tested for
1346            // balance
1347            if csb_count > 0 {
1348                lazy_static! {
1349                    static ref URL_RE: Regex = fregex!(r"(?P<url>^.*\])(?!=)(?P<end>.*?)$");
1350                }
1351                if let Ok(Some(m)) = URL_RE.captures(url) {
1352                    url = unwrap_or_empty(m.name("url"));
1353                    tight = format!("{0}{1}", &m["end"], tight);
1354                }
1355            }
1356
1357            // Now we have the array of all the multi-byte chars in the url we will
1358            // parse the  uri backwards and pop off  any chars that don't belong
1359            // there (like . or , or unmatched brackets of various kinds).
1360            let mut first = true;
1361            let mut url_chars: Vec<_> = url.chars().collect();
1362
1363            loop {
1364                let mut popped = false;
1365                if let Some(c) = url_chars.pop() {
1366                    match c {
1367                        '!' | '?' | ':' | ';' | '.' | ',' => {
1368                            // Textile URL shouldn't end in these characters, we pop them off
1369                            // the end and push them out the back of the url again
1370                            pop.insert(0, c);
1371                            popped = true;
1372                        },
1373                        '>' => {
1374                            let url_left: String = url_chars.iter().collect();
1375
1376                            lazy_static! {
1377                                static ref RE: Regex = fregex!(r"^(?P<url_chars>.*)(?P<tag></[a-z]+)$");
1378                            }
1379                            if let Ok(Some(m)) = RE.captures(&url_left) {
1380                                url_chars.splice(.., m["url_chars"].chars());
1381                                pop = format!("{0}{1}{2}", &m["tag"], c, pop);
1382                                popped = true;
1383                            }
1384                        },
1385                        ']' => {
1386                            // If we find a closing square bracket we are going to see if it is
1387                            // balanced.  If it is balanced with matching opening bracket then it
1388                            // is part of the URL else we spit it back out of the URL."""
1389                            // If counts['['] is None, count the occurrences of '['
1390                            if counts['['].is_none() {
1391                                counts['['] = Some(url.matches('[').count());
1392                            }
1393                            if counts['['] == counts[']'] {
1394                                // It is balanced, so keep it
1395                                url_chars.push(c)
1396                            } else {
1397                                // In the case of un-matched closing square brackets we just eat it
1398                                popped = true;
1399                                counts.dec(']');
1400                                if first {
1401                                    pre.clear();
1402                                }
1403                            }
1404                        },
1405                        ')' => {
1406                            if counts[')'].is_none() {
1407                                counts['('] = Some(url.matches('(').count());
1408                                counts[')'] = Some(url.matches(')').count());
1409                            }
1410
1411                            if counts['('] == counts[')'] {
1412                                url_chars.push(c);
1413                            } else {
1414                                // Unbalanced so spit it out the back end
1415                                pop.insert(0, c);
1416                                counts.dec(')');
1417                                popped = true;
1418                            }
1419                        },
1420                        _ => {
1421                            url_chars.push(c);
1422                        }
1423                    }
1424                }
1425
1426                first = false;
1427                if !popped {
1428                    break;
1429                }
1430            }
1431
1432            let url: String = url_chars.iter().collect();
1433
1434            let url = self.unrestrict_url(&url);
1435            let uri_parts = UrlBits::parse(&url);
1436            let allowed_schemes = if self.textile.restricted {
1437                &RESTRICTED_URL_SCHEMES[..]
1438            } else {
1439                &UNRESTRICTED_URL_SCHEMES[..]
1440            };
1441            let scheme_in_list = allowed_schemes.contains(&(uri_parts.scheme()));
1442            let is_valid_url = uri_parts.scheme().is_empty() || scheme_in_list;
1443            if !is_valid_url {
1444                return in_.replace(&format!("{0}linkStartMarker:", self.textile.uid), "");
1445            }
1446
1447            let text: Cow<str> = if text == "$" {
1448                if scheme_in_list {
1449                    make_url_readable(&url).into()
1450                } else if let Some(rurl) = self.urlrefs.get(url.as_ref()) {
1451                    encode_html(make_url_readable(rurl.source()), true, true).into()
1452                } else {
1453                    url
1454                }
1455            } else {
1456                text.into()
1457            };
1458
1459            let text = text.trim();
1460            let title = encode_html(title, false, false);
1461
1462            let text = if !self.textile.noimage {
1463                self.image(text)
1464            } else {
1465                Cow::Borrowed(text)
1466            };
1467            let text = self.span(&text);
1468            let text = self.glyphs(&text);
1469
1470
1471            let normalized_url = uri_parts.to_string();
1472            let url_id = self.shelve_url(
1473                UrlString::Normalized(normalized_url.into()));
1474            let mut attributes = BlockAttributes::parse(atts, None, true, self.textile.restricted).html_attrs();
1475            attributes.insert("href", url_id);
1476            if !title.is_empty() {
1477                attributes.insert("title", self.shelve(title));
1478            }
1479            if let Some(ref rel) = self.textile.rel {
1480                attributes.insert("rel", rel.clone());
1481            }
1482            let a_text = generate_tag("a", Some(&text), &attributes);
1483            let a_shelf_id = self.shelve(a_text);
1484            let result = format!("{0}{1}{2}{3}", pre, a_shelf_id, pop, tight);
1485            result
1486        };
1487
1488
1489        let mut prev_text = Cow::Borrowed(text);
1490        let mut abort = false;
1491        while !abort && prev_text.contains(&needle) {
1492            let new_text = pattern.replace_all(&prev_text, &mut f_link);
1493            if new_text == prev_text {
1494                abort = true;
1495            }
1496            prev_text = new_text.into_owned().into()
1497        }
1498        prev_text
1499    }
1500
1501    fn is_valid_url(&self, url: &str) -> bool {
1502        let uri_parts = UrlBits::parse(url);
1503        if uri_parts.scheme().is_empty() {
1504            true
1505        } else {
1506            let allowed_schemes = if self.textile.restricted {
1507                &RESTRICTED_URL_SCHEMES[..]
1508            } else {
1509                &UNRESTRICTED_URL_SCHEMES[..]
1510            };
1511            allowed_schemes.contains(&(uri_parts.scheme()))
1512        }
1513    }
1514
1515    pub fn graf<'a>(&mut self, text: &'a str) -> Cow<'a, str> {
1516        let lite = self.textile.lite;
1517        let text = Cow::Borrowed(text);
1518        let text = if !lite {self.no_textile(&text).into()} else {text};
1519        let text = if !lite {self.code(&text).into()} else {text};
1520        let text = self.get_html_comments(&text);
1521        let text = self.get_refs(&text);
1522        let ltext = self.glyph_quoted_quote(&text);
1523        let text = self.links(&ltext);
1524        let text = if !self.textile.noimage {self.image(&text)} else {text.into()};
1525        let text = if !lite {self.table(&text)} else {text};
1526        let text = if !lite {self.redcloth_list(&text)} else {text};
1527        let text = if !lite { self.textile_lists(&text)} else {text };
1528        let text = self.span(&text);
1529        let text = self.footnote_ref(&text);
1530        let text = self.note_ref(&text);
1531        let text = self.glyphs(&text);
1532        Cow::Owned(text.trim_end_matches('\n').to_owned())
1533    }
1534
1535    fn span<'a>(&mut self, text: &'a str) -> Cow<'a, str> {
1536        lazy_static! {
1537            static ref TAG_PATTERNS: [Regex; 10] = [
1538                span_re(r"\*\*"), span_re(r"\*"), span_re(r"\?\?"),
1539                span_re(r"\-"), span_re(r"__"), span_re(r"_"), span_re(r"%"),
1540                span_re(r"\+"), span_re(r"~"), span_re(r"\^")
1541            ];
1542        }
1543        self.span_depth += 1;
1544        let can_replace = self.span_depth <= self.textile.max_span_depth;
1545
1546        let f_span = |cap: &Captures| -> String {
1547            // pre, tag, atts, cite, content, end, tail = match.groups()
1548            let tag = match &cap[2] {
1549                "*" => "strong",
1550                "**" =>"b",
1551                "??" =>"cite",
1552                "_" => "em",
1553                "__" =>"i",
1554                "-" => "del",
1555                "%" => "span",
1556                "+" => "ins",
1557                "~" => "sub",
1558                "^" => "sup",
1559                _ => unreachable!("Not allowed by the regex")
1560            };
1561            let atts = &cap[3];
1562            let mut html_atts = BlockAttributes::parse(atts, None, true, self.textile.restricted).html_attrs();
1563            if let Some(cite) = cap.get(4) {
1564                html_atts.insert("cite", cite.as_str().trim().to_owned());
1565            }
1566            let content = &cap[5];
1567            let content = self.span(content);
1568            let end = &cap[6];
1569            let (pre, tail) = get_special_options(
1570                unwrap_or_empty(cap.get(1)),
1571                unwrap_or_empty(cap.get(7)));
1572            let mut open_tag = String::from("<") + tag;
1573            join_html_attributes(&mut open_tag, &html_atts);
1574            open_tag.push('>');
1575            let close_tag = format!("</{}>", tag);
1576            let (open_tag_id, close_tag_id) = self.store_tags(open_tag, close_tag);
1577            String::from(pre) + &open_tag_id + &content + end + &close_tag_id + tail
1578        };
1579
1580        let mut text = Cow::Borrowed(text);
1581        if can_replace {
1582            text = Cow::Owned(multi_replace_with_one(text, TAG_PATTERNS.iter(), f_span));
1583        }
1584        self.span_depth -= 1;
1585        text
1586    }
1587
1588    fn store_tags(&mut self, open_tag: String, close_tag: String) -> (String, String) {
1589        self.ref_index += 1;
1590        self.ref_cache.insert(self.ref_index, open_tag);
1591        let open_tag_id = format!("{0}{1}:ospan ", self.textile.uid, self.ref_index);
1592
1593        self.ref_index += 1;
1594        self.ref_cache.insert(self.ref_index, close_tag);
1595        let close_tag_id = format!(" {0}{1}:cspan", self.textile.uid, self.ref_index);
1596        (open_tag_id, close_tag_id)
1597    }
1598
1599    fn retrieve_tags(&self, text: &str) -> String {
1600        let f_retrieve_tags = |cap: &Captures| -> String {
1601            let tag_id = cap[1].parse::<u32>().expect("must be an integer");
1602            self.ref_cache.get(&tag_id).cloned().unwrap_or_default()
1603        };
1604        let result = {
1605            let mut regex_cache = self.textile.regex_cache.borrow_mut();
1606            let open_tag_re: &Regex =
1607                regex_cache
1608                .entry(line!())
1609                .or_default()
1610                .entry("")
1611                .or_insert_with(
1612                    || fregex!(&format!("{0}(?P<token>[0-9]+):ospan ", self.textile.uid)));
1613            open_tag_re.replace_all(text, f_retrieve_tags)
1614        };
1615        let result = {
1616            let mut regex_cache = self.textile.regex_cache.borrow_mut();
1617            let close_tag_re: &Regex =
1618                regex_cache
1619                .entry(line!())
1620                .or_default()
1621                .entry("")
1622                .or_insert_with(
1623                    || fregex!(&format!(" {0}(?P<token>[0-9]+):cspan", self.textile.uid)));
1624            close_tag_re.replace_all(&result, f_retrieve_tags)
1625        };
1626        result.into_owned()
1627    }
1628
1629    pub fn block<'b>(&mut self, text: &'b str) -> String {
1630        fn textile_block_re(block_tags_pattern: &str) -> Regex {
1631            fregex!(
1632                &format!(
1633                    concat!(r"(?s)^(?P<tag>{0})(?P<atts>{1}{2}{1})\.(?P<ext>\.?)",
1634                            r"(?::(?P<cite>\S+))? (?P<graf>.*)$"),
1635                    block_tags_pattern, *ALIGN_RE_S, *CLS_RE_S))
1636        }
1637        lazy_static! {
1638            static ref TEXTILE_TAG_RE: Regex = textile_block_re(
1639                BLOCK_TAGS_RE_S);
1640            static ref TEXTILE_LIGHT_TAG_RE: Regex = textile_block_re(
1641                BLOCK_TAGS_LITE_RE_S);
1642            static ref MULTI_ENDLINE_RE: Regex = fregex!(r"(\n{2,})");
1643            static ref BR_TAG_RE: Regex = fregex!(r"(?i)<br\s*?/?>");
1644        }
1645        let mut out = Vec::<Cow<'b, str>>::new();
1646        let tag_pattern: &Regex = if self.textile.lite {
1647            &TEXTILE_LIGHT_TAG_RE
1648        } else {
1649            &TEXTILE_TAG_RE
1650        };
1651        let mut whitespace = String::new();
1652        let mut eat_whitespace = false;
1653        let mut ext = "";
1654        let mut tag = "";
1655        let mut atts = "";
1656        let mut cite = None;
1657        let mut last_outer_closing = String::new();
1658        let mut eat = false;
1659        let textblocks = split_with_capture(&MULTI_ENDLINE_RE, text);
1660        for block in textblocks {
1661            if block.trim().is_empty() {
1662                if !eat_whitespace {
1663                    whitespace += block;
1664                }
1665                continue;
1666            }
1667
1668            if ext.is_empty() {
1669                tag = "p";
1670                atts = "";
1671                cite = None;
1672                eat = false;
1673            }
1674
1675            eat_whitespace = false;
1676            let mut is_anonymous_block = true;
1677            let block_output = if let Ok(Some(m)) = tag_pattern.captures(block) {
1678                is_anonymous_block = false;
1679                // Last block was extended, so close it
1680                if !ext.is_empty() {
1681                    if let Some(last_out) = out.last_mut() {
1682                        last_out.to_mut().push_str(&last_outer_closing);
1683                    }
1684                }
1685                tag = unwrap_or_empty(m.get(1));
1686                atts = unwrap_or_empty(m.get(2));
1687                ext = unwrap_or_empty(m.get(3));
1688                cite = m.get(4).as_ref().map(Match::as_str);
1689                let content = unwrap_or_empty(m.get(5));
1690                let bdata = Block::new(tag, atts, cite, content, self);
1691                eat = bdata.eat;
1692                last_outer_closing.replace_range(.., &bdata.outer_closing);
1693
1694                bdata.outer_opening
1695                    + &bdata.inner_opening
1696                    + &bdata.content
1697                    + &bdata.inner_closing
1698                    + if ext.is_empty() { &bdata.outer_closing } else { "" }
1699            } else {
1700                let raw_block = DIVIDER_RE.is_match(block).unwrap_or_default();
1701                if !ext.is_empty() || (!block.starts_with(' ') && !raw_block) {
1702                    let bdata =  Block::new(tag, atts, cite, block, self);
1703                    eat = bdata.eat;
1704                    last_outer_closing.replace_range(.., &bdata.outer_closing);
1705                    // Skip outer tag because this is part of a continuing extended block
1706                    if bdata.content.is_empty() || (tag == "p" && !has_raw_text(&bdata.content)) {
1707                        bdata.content
1708                    } else {
1709                        bdata.inner_opening + &bdata.content + &bdata.inner_closing
1710                    }
1711                } else if raw_block && self.textile.restricted {
1712                    self.shelve(encode_html(block, self.textile.restricted, false))
1713                } else if raw_block {
1714                    self.shelve(block.to_owned())
1715                } else {
1716                    self.graf(block).into_owned()
1717                }
1718            };
1719            let block_output = self.do_p_br(&block_output);
1720            let block_output = whitespace.clone() + &BR_TAG_RE
1721                .replace_all(
1722                    &block_output,
1723                    self.textile.proper_br_tag());
1724
1725            if !ext.is_empty() && is_anonymous_block {
1726                if let Some(last_out) = out.last_mut() {
1727                    last_out.to_mut().push_str(&block_output);
1728                }
1729            } else if !eat {
1730                out.push(block_output.into());
1731            }
1732
1733            if eat {
1734                eat_whitespace = true;
1735            } else {
1736                whitespace.clear();
1737            }
1738        }
1739        if !ext.is_empty() {
1740            if let Some(last_output) = out.last_mut() {
1741                *last_output += last_outer_closing.as_str();
1742            }
1743        }
1744        out.join("")
1745    }
1746
1747    fn glyph_quoted_quote<'a>(&mut self, text: &'a str) -> Cow<'a, str> {
1748        const QUOTE_STARTS: &str = "\"'({[«»‹›„‚‘”";
1749        lazy_static! {
1750            static ref PATTERN_RE: Regex = fregex!(
1751                &format!(" (?P<pre>[{}])(?P<quoted>\"?|\"[^\"]+)(?P<post>.) ",
1752                         fancy_regex::escape(QUOTE_STARTS)));
1753        }
1754
1755        fn matching_quote(quote: char) -> Option<char> {
1756            match quote {
1757                '"' => Some('"'),
1758                '\'' => Some('\''),
1759                '(' => Some(')'),
1760                '{' => Some('}'),
1761                '[' => Some(']'),
1762                '«' => Some('»'),
1763                '»' => Some('«'),
1764                '‹' => Some('›'),
1765                '›' => Some('‹'),
1766                '„' => Some('“'),
1767                '‚' => Some('‘'),
1768                '‘' => Some('’'),
1769                '”' => Some('“'),
1770                _ => None
1771            }
1772        }
1773
1774        let f_glyph_quoted_quote = |m: &Captures| -> String {
1775            // Check the correct closing character was found.
1776            let mut pre_char_buf = [0u8; 4];
1777            let mut post_char_buf = [0u8; 4];
1778            if let Some(pre_char) = m["pre"].chars().next() {
1779                if let Some(post_char) = m["post"].chars().next() {
1780                    if Some(post_char) != matching_quote(pre_char) {
1781                        return m[0].to_owned();
1782                    }
1783                    let new_pre = match pre_char {
1784                        '"' => "&#8220;",
1785                        '\'' => "&#8216;",
1786                        ' ' => "&nbsp;",
1787                        // a frugal replacement for char::to_string()
1788                        x => x.encode_utf8(&mut pre_char_buf)
1789                    };
1790                    let new_post = match post_char {
1791                        '"' => "&#8221;",
1792                        '\'' => "&#8217;",
1793                        ' ' => "&nbsp;",
1794                        x => x.encode_utf8(&mut post_char_buf)
1795                    };
1796                    let found = &m["quoted"];
1797                    let found: Cow<str> = if found.len() > 1 {
1798                        self.glyphs(found).trim_end().to_owned().into()
1799                    } else if found == "\"" {
1800                        "&quot;".into()
1801                    } else {
1802                        found.into()
1803                    };
1804                    return self.shelve(format!(" {new_pre}{found}{new_post} "))
1805                }
1806            }
1807            unreachable!("Should be reached, check regular expression");
1808        };
1809        PATTERN_RE.replace_all(text, f_glyph_quoted_quote)
1810    }
1811
1812}
1813
1814
1815/// Determines which flavor of HTML the [`Textile`] parser will produce.
1816/// Check [`Textile::set_html_kind`] for details.
1817pub enum HtmlKind {
1818    XHTML,
1819    HTML5
1820}
1821
1822type AmmoniaConfigurator = dyn for <'a, 'b>
1823    Fn(&'a mut crate::ammonia::Builder<'b>) -> &'a crate::ammonia::Builder<'b>;
1824
1825/// The core structure responsible for converting Textile markup into HTML.
1826///
1827/// Example:
1828/// ```
1829/// use rustextile::{Textile, HtmlKind};
1830/// let textile = Textile::default()
1831///     .set_html_kind(HtmlKind::XHTML)
1832///     .set_restricted(true);
1833/// let html = textile.parse("h1. It works!");
1834/// assert_eq!(html, "<h1>It works!</h1>");
1835/// ```
1836pub struct Textile {
1837    uid: String,
1838    pub(crate) link_prefix: String,
1839    pub(crate) restricted: bool,
1840    pub(crate) raw_block_enabled: bool,
1841    pub(crate) align_class_enabled: Option<bool>,
1842    block_tags: bool,
1843    pub(crate) lite: bool,
1844    noimage: bool,
1845    get_sizes: bool,
1846    max_span_depth: u32,
1847    html_type: HtmlKind,
1848    rel: Option<String>,
1849    regex_cache: std::cell::RefCell<HashMap<u32, HashMap<&'static str, Regex>>>,
1850    dyn_glyph_replacers: [(Regex, String); 1],
1851    sanitizer_config: Option<Box<AmmoniaConfigurator>>,
1852}
1853
1854fn normalize_newlines(text: &str) -> String {
1855    lazy_static! {
1856        static ref CHANGES: [(Regex, &'static str); 2] = [
1857            (fregex!(r"\r\n?"), "\n"),
1858            (fregex!(r"(?m)^[ \t]*\n"), "\n"),
1859        ];
1860    }
1861    multi_replace(text.into(), CHANGES.iter().map(|i| (&i.0, i.1)))
1862        .trim_matches('\n')
1863        .into()
1864}
1865
1866fn time_based_uid() -> String {
1867    let now = SystemTime::now().duration_since(UNIX_EPOCH).unwrap();
1868    let mut hasher = DefaultHasher::new();
1869    hasher.write_u128(now.as_nanos());
1870    format!("{:x}", hasher.finish())
1871}
1872
1873const SNIP_NAB: &str = r"\p{Ll}";
1874lazy_static! {
1875    static ref DYN_3PLUS_RE: Regex = fregex!(
1876        &format!(
1877            concat!(
1878                r#"({space}|^|[>(;-])([{abr}]{{3,}})([{nab}]*)"#,
1879                r#"(?={space}|{pnct}|<|$)(?=[^">]*?(<|$))"#),
1880            space=SNIP_SPACE,
1881            abr=SNIP_ABR,
1882            nab=SNIP_NAB,
1883            pnct=PNCT_RE_S));
1884}
1885
1886impl Default for Textile {
1887    fn default() -> Self {
1888        let result = Textile {
1889            link_prefix: String::new(), // to be filled by set_uid()
1890            uid: String::new(), // to be filled by set_uid()
1891            restricted: false,
1892            raw_block_enabled: false,
1893            align_class_enabled: None,
1894            block_tags: true,
1895            lite: false,
1896            noimage: false,
1897            get_sizes: false,
1898            max_span_depth: 5,
1899            html_type: HtmlKind::HTML5,
1900            rel: None,
1901            sanitizer_config: None,
1902            regex_cache: std::cell::RefCell::new(Default::default()),
1903            dyn_glyph_replacers: [
1904                // 3+ uppercase
1905                // will be properly filled later by set_uid
1906                (DYN_3PLUS_RE.clone(), String::new()),
1907            ]
1908        };
1909        result.set_uid(&time_based_uid())
1910    }
1911}
1912
1913impl Textile {
1914
1915    /// Given a Textile-formatted text, converts it into HTML (or XHTML,
1916    /// if [`set_html_kind`](Textile::set_html_kind)[`(HtmlKind::XHTML)`](HtmlKind::XHTML)
1917    /// was called previously).
1918    pub fn parse(&self, text: &str) -> String {
1919
1920        if text.trim().is_empty() {
1921            return text.to_owned();
1922        }
1923
1924        let text = if self.restricted {
1925            Cow::Owned(encode_html(text, false, false))
1926        } else {
1927            Cow::Borrowed(text)
1928        };
1929
1930        let mut state = ParserState::new(self);
1931        let text = normalize_newlines(&text)
1932            .replace(&state.textile.uid, "");
1933
1934        let text = if self.block_tags {
1935            let text = state.block(&text);
1936            state.place_note_lists(&text).into_owned()
1937        } else {
1938            let text = text + "\n\n";
1939            // Treat quoted quote as a special glyph.
1940            let text = state.glyph_quoted_quote(&text);
1941            // Inline markup (em, strong, sup, sub, del etc).
1942            let text = state.span(&text);
1943            // Glyph level substitutions (mainly typographic -- " & ' => curly
1944            // quotes, -- => em-dash etc.
1945            state.glyphs(&text).into_owned()
1946        };
1947
1948        let text = state.retrieve(text);
1949        let text = text.replace(
1950            &format!("{0}:glyph:", &state.textile.uid),
1951            "");
1952
1953        let text = state.retrieve_tags(&text);
1954        let text = state.retrieve_urls(&text);
1955
1956        let text = match self.sanitizer_config {
1957            Some(ref configurator) =>
1958                configurator(
1959                    crate::ammonia::Builder::default().link_rel(None)
1960                )
1961                .clean(&text)
1962                .to_string()
1963                .into(),
1964            None => text,
1965        };
1966
1967        // if the text contains a break tag (<br> or <br />) not followed by
1968        // a newline, replace it with a new style break tag and a newline.
1969        lazy_static! {
1970            static ref BR_PATTERN: Regex = fregex!(r"<br( /)?>(?!\n)");
1971        }
1972
1973        let text = BR_PATTERN.replace_all(
1974            &text,
1975            match self.html_type {
1976                HtmlKind::XHTML => "<br />\n",
1977                HtmlKind::HTML5 => "<br>\n",
1978            });
1979
1980        let text = text.trim_end_matches('\n');
1981
1982        text.to_string()
1983    }
1984
1985    /// Enables automatic addition of `width` and `height` attributes
1986    /// to `<img .. />` image tags, based on their actual dimensions.
1987    /// This requires sending one HTTP requests per image to determine the size
1988    /// of each, though each request will fetch only a small chunk of the image
1989    /// (1 KiB) enough for determening its size.
1990    pub fn set_getting_image_size(mut self, value: bool) -> Self {
1991        self.get_sizes = value;
1992        self
1993    }
1994
1995    /// Whether Textile block tags (such as `bc.`) should be parsed
1996    /// and processed. Enabled by default.
1997    pub fn set_block_tags(mut self, value: bool) -> Self {
1998        self.block_tags = value;
1999        self
2000    }
2001
2002    /// Which flavor of HTML the parser should output: either XHTML or HTML5.
2003    /// This affects whether `<acronim>` or `<abbr>` will be used,
2004    /// `<br>` or `<br />` and so on.
2005    ///
2006    /// See also [`Textile::set_align_class`] for details on how the images
2007    /// will be handled in each case.
2008    pub fn set_html_kind(mut self, html_type: HtmlKind) -> Self {
2009        self.html_type = html_type;
2010        self
2011    }
2012
2013    /// Controls the restricted mode, which (when enabled) forces the parser to
2014    ///
2015    /// * escape any raw HTML
2016    /// * ignores any potentially unsafe Textile attributes within the document
2017    ///   (the ones that force a particular "style", "class" or "id" within the HTML).
2018    /// * allows only certain URL schemes ("http", "https", "ftp", "mailto").
2019    ///
2020    /// Check also [`Textile::set_lite`], [`Textile::set_images`] and [`Textile::set_sanitize`],
2021    /// which provide alternative kinds of restrictions.
2022    pub fn set_restricted(mut self, value: bool) -> Self {
2023        self.restricted = value;
2024        self
2025    }
2026
2027    /// Enables the "lite mode", which limits the set of allowed Textile
2028    /// blocks to paragraphs and blockquotes only.
2029    ///
2030    /// Check also [`Textile::set_images`], [`Textile::set_restricted`]
2031    /// or [`Textile::set_sanitize`] if you need to put more restrictions
2032    /// on how the parser handles its input.
2033    pub fn set_lite(mut self, value: bool) -> Self {
2034        self.lite = value;
2035        self
2036    }
2037
2038    /// Controls whether images are allowed in the input.
2039    pub fn set_images(mut self, value: bool) -> Self {
2040        self.noimage = !value;
2041        self
2042    }
2043
2044    /// Forces a certain "rel" property on all links processed by the parser.
2045    /// As an example, you can set it to `"nofollow"` to prevent search engines
2046    /// from scanning them.
2047    pub fn set_rel<S>(mut self, value: Option<S>) -> Self where S: AsRef<str> {
2048        self.rel = value.map(|v| v.as_ref().to_owned());
2049        self
2050    }
2051
2052    /// Controls whether images must be aligned by using the `align`
2053    /// HTML5 attribute (which became deprecated in HTML5) or by adding
2054    /// an `"align-{left|rignt|center}"` class to the `<img>` tag.
2055    ///
2056    /// If not set, for XHTML the "align" attribute is going to be used,
2057    /// and for HTML5 the `align-...` class will be added instead.
2058    pub fn set_align_class(mut self, value: bool) {
2059        self.align_class_enabled = Some(value);
2060    }
2061
2062    /// Enables and disables raw blocks.
2063    ///
2064    /// When raw blocks are enabled, any paragraph blocks wrapped in a tag
2065    /// not matching HTML block or phrasing tags will not
2066    /// be parsed, and instead is left as is.
2067    pub fn set_raw_blocks(mut self, value: bool) -> Self {
2068        self.raw_block_enabled = value;
2069        self
2070    }
2071
2072    /// Controls the final extra HTML sanitation step, which is done by
2073    /// the [Ammonia](crate::ammonia) library. A quote
2074    /// from the Ammonia's documentation:
2075    ///
2076    /// > "Ammonia is designed to prevent cross-site scripting, layout breaking,
2077    /// > and clickjacking caused by untrusted user-provided HTML being mixed
2078    /// > into a larger web page"
2079    ///
2080    /// This sanitation will use the default Ammonia's settings. You can adjust
2081    /// them by calling [`Textile::adjust_sanitizer`] method.
2082    /// Also check [`Textile::set_images`], [`Textile::set_lite`]
2083    /// and [`Textile::set_restricted`] to learn about other safety measures.
2084    pub fn set_sanitize(mut self, enable: bool) -> Self {
2085        if enable {
2086            self.adjust_sanitizer(|sanitizer| sanitizer)
2087        } else {
2088            self.sanitizer_config = None;
2089            self
2090        }
2091    }
2092
2093    /// Just like [`Textile::set_sanitize`] this method enables additional
2094    /// sanitation of the output HTML through the
2095    /// [Ammonia](crate::ammonia) library. But you can
2096    /// also configure the sanitizer yourself.
2097    ///
2098    /// Example:
2099    ///
2100    /// ```rust
2101    /// use rustextile::Textile;
2102    /// let parser = Textile::default()
2103    ///     .adjust_sanitizer(|sanitizer| sanitizer.link_rel(Some("noopener")));
2104    /// let html = parser.parse(r#""a link":https://example.com"#);
2105    /// assert_eq!(html, r#"<p><a href="https://example.com/" rel="noopener">a link</a></p>"#);
2106    /// ```
2107    pub fn adjust_sanitizer<F>(mut self, configurator: F) -> Self
2108        where for <'a, 'b> F: Fn(&'a mut crate::ammonia::Builder<'b>) -> &'a crate::ammonia::Builder<'b> + 'a
2109    {
2110        self.sanitizer_config = Some(Box::new(configurator));
2111        self
2112    }
2113
2114    /// Allows to control a small random token which is used by the parser
2115    /// internally to construct unique HTML id attributes and links necessary
2116    /// for footnotes.
2117    ///
2118    /// Normally you don't need to use this method. Its intended purpose
2119    /// is to guarantee stable outputs in automated tests.
2120    pub fn set_uid(mut self, base_id: &str) -> Self {
2121        self.uid = format!("textileRef:{0}:", base_id);
2122        self.link_prefix = format!("{0}-", base_id);
2123        let dyn_3plus_replacement = format!(
2124            r#"$1<span class="caps">{0}:glyph:$2</span>$3"#,
2125            &self.uid);
2126        self.dyn_glyph_replacers = [
2127            // 3+ uppercase
2128            (DYN_3PLUS_RE.clone(), dyn_3plus_replacement),
2129        ];
2130        self
2131    }
2132
2133    pub(crate) fn proper_br_tag(&self) -> &'static str {
2134        match self.html_type {
2135            HtmlKind::XHTML => "<br />",
2136            HtmlKind::HTML5 => "<br>",
2137        }
2138    }
2139}
2140
2141#[cfg(test)]
2142mod test {
2143    use super::get_image_size;
2144
2145    #[test]
2146    fn test_get_image_size() {
2147        // Getting a real image
2148        let url = "https://en.wikipedia.org/favicon.ico";
2149        let size = get_image_size(url);
2150        assert_ne!(size, None);
2151        if let Some((width, height)) = size {
2152            assert!(width > 0);
2153            assert!(height > 0);
2154        }
2155
2156        // Getting an impossible image
2157        let size = get_image_size("../picture.jpg");
2158        assert_eq!(size, None);
2159    }
2160
2161    #[test]
2162    fn test_footnote_ref() {
2163        let t = super::Textile::default();
2164        let mut state = super::ParserState::new(&t);
2165        let result = state.footnote_ref("foo[1]");
2166        let expect = format!(
2167            "foo<sup class=\"footnote\" id=\"fnrev{0}1\"><a href=\"#fn{0}1\">1</a></sup>",
2168            t.link_prefix);
2169        assert_eq!(result, expect);
2170    }
2171}
rustextile/parser.rs

rustextile/
parser.rs