1use std::collections::{BTreeMap, HashMap};
2use std::borrow::Cow;
3use std::io::Read;
4use std::time::{SystemTime, UNIX_EPOCH};
5use std::collections::hash_map::DefaultHasher;
6use std::hash::Hasher;
7
8use indexmap::IndexMap;
9use lazy_static::lazy_static;
10use fancy_regex::{Regex, Captures, Replacer, Match};
11
12use crate::charcounter::CharCounter;
13use crate::regextra::{split_with_capture, fregex, multi_replace, multi_replace_with_one, unwrap_or_empty};
14use crate::htmltools::{generate_tag, encode_html, join_html_attributes, unescape, has_raw_text, reverse_encode_html};
15use crate::table::{process_table, TABLE_SPAN_RE_S};
16use crate::urlutils::{UrlBits, UrlString};
17use crate::block::{Block, BlockAttributes, BlockHtmlAttributes};
18use crate::regex_snips::{
19 CLS_RE_S, ALIGN_RE_S, SNIP_ACR, SNIP_ABR, SNIP_SPACE, SNIP_DIGIT,
20 SNIP_WRD, SNIP_CUR, SNIP_CHAR, LONE_AMP_RE, PNCT_RE_S, DIVIDER_RE};
21
22const SYMS_RE_S: &str = "¤§µ¶†‡•∗∴◊♠♣♥♦";
23const BLOCK_TAGS_RE_S: &str = r"bq|bc|notextile|pre|h[1-6]|fn\d+|p|###";
25const BLOCK_TAGS_LITE_RE_S: &str = "bq|bc|p";
26const RESTRICTED_URL_SCHEMES: [&str; 4] = ["http", "https", "ftp", "mailto"];
27const UNRESTRICTED_URL_SCHEMES: [&str; 9] = ["http", "https", "ftp", "mailto", "file", "tel", "callto", "sftp", "data"];
28
29fn span_re(tag: &str) -> Regex {
30 const PNCT: &str = r#".,"'?!;:‹›«»„“”‚‘’"#;
31 fregex!(
32 &format!(
33 concat!(
34 r"(?P<pre>^|(?<=[\s>{pnct}\(])|[{{\[])",
35 r"(?P<tag>{tag})(?!{tag})",
36 r"(?P<atts>{cls})",
37 r"(?!{tag})",
38 r"(?::(?P<cite>\S+[^{tag}]{space}))?",
39 r"(?P<content>[^{space}{tag}]+|\S.*?[^\s{tag}\n])",
40 r"(?P<end>[{pnct}]*)",
41 r"{tag}",
42 r"(?P<tail>$|[\[\]}}<]|(?=[{pnct}]{{1,2}}[^0-9]|\s|\)))"),
43 tag=tag, cls=*CLS_RE_S, pnct=PNCT, space=SNIP_SPACE))
44}
45
46fn do_special<'t, R>(text: &'t str, start: &str, end: &str, method: R) -> Cow<'t, str>
47 where R: Replacer
48{
49 let pattern = Regex::new(
50 &format!(r"(?ms)(^|\s|[\[({{>|]){0}(.*?){1}($|[\])}}])?",
51 fancy_regex::escape(start),
52 fancy_regex::escape(end)))
53 .expect("A valid expression");
54
55 pattern.replace_all(text, method)
56}
57
58fn get_image_size(url: &str) -> Option<(i64, i64)> {
59 const MAX_IMAGE_CHUNK: usize = 1024;
60 let mut buffer = [0u8; MAX_IMAGE_CHUNK];
61 if let Ok(mut response) = reqwest::blocking::get(url) {
62 let mut read_total: usize = 0;
63 loop {
64 let read_result = response.read(&mut buffer[read_total..]);
65 match read_result {
66 Ok(bytes_fetched) => {
67 if bytes_fetched == 0 { break; }
68 read_total += bytes_fetched;
69 if let Ok(info) = imageinfo::ImageInfo::from_raw_data(&buffer[..read_total]) {
70 return Some((info.size.width, info.size.height));
71 }
72 },
73 Err(_) => {
74 return None;
75 },
76 }
77 }
78 }
79 None
80}
81
82fn make_glyph_replacers(is_html5: bool) -> [(Regex, &'static str); 22] {
83 lazy_static! {
84 static ref CUR: String = format!(
85 r"(?:[{0}]{1}*)?", SNIP_CUR, SNIP_SPACE);
86 }
87 [
88 (fregex!(
90 &format!(
91 concat!(r#"(?i)(?<=\b|x)([0-9]+[\])]?['"]? ?)[x]( ?[\[(]?)"#,
92 r"(?=[+-]?{0}[0-9]*\.?[0-9]+)"),
93 *CUR)),
94 r"$1×$2"),
95 (fregex!(&format!(r"({0}|\))'({0})", SNIP_WRD)),
97 r"$1’$2"),
98 (fregex!(&format!(r"({0})'(\d+{1}?)\b(?![.]?[{1}]*?')", SNIP_SPACE, SNIP_WRD)),
100 r"$1’$2"),
101 (fregex!(r"([(\[{])'(?=\S)"), r"$1‘"),
103 (fregex!(&format!(r"(\S)'(?={0}|{1}|<|$)", SNIP_SPACE, PNCT_RE_S)),
105 r"$1’"),
106 (fregex!(r"'"), r"‘"),
108 (fregex!(r#"([(\[{])"(?=\S)"#), r"$1“"),
111 (fregex!(&format!(r#"(\S)"(?={0}|{1}|<|$)"#, SNIP_SPACE, PNCT_RE_S)),
113 r"$1”"),
114 (fregex!(r#"""#), r"“"),
116 (fregex!(r"([^.]?)\.{3}"), r"$1…"),
118 (fregex!(r"(\s?)&(\s)"), r"$1&$2"),
120 (fregex!(r"(\s?)--(\s?)"), r"$1—$2"),
122 (fregex!(r" - "), r" – "),
124 (fregex!(&format!(r"(?i)(\b ?|{0}|^)[(\[]TM[\])]", SNIP_SPACE)),
126 r"$1™"),
127 (fregex!(&format!(r"(?i)(\b ?|{0}|^)[(\[]R[\])]", SNIP_SPACE)),
129 r"$1®"),
130 (fregex!(&format!(r"(?i)(\b ?|{0}|^)[(\[]C[\])]", SNIP_SPACE)),
132 r"$1©"),
133 (fregex!(r"[(\[]1\/2[\])]"), r"½"),
135 (fregex!(r"[(\[]1\/4[\])]"), r"¼"),
137 (fregex!(r"[(\[]3\/4[\])]"), r"¾"),
139 (fregex!(r"[(\[]o[\])]"), r"°"),
141 (fregex!(r"[(\[]\+\/-[\])]"), r"±"),
143 (fregex!(&format!(r"\b([{0}][{1}]{{2,}})\b(?:[(]([^)]*)[)])", SNIP_ABR, SNIP_ACR)),
145 if is_html5 {r#"<abbr title="$2">$1</abbr>"#} else {r#"<acronym title="$2">$1</acronym>"#}),
146 ]
147}
148
149#[derive(Clone, Debug)]
150pub(crate) struct NoteInfo {
151 pub id: String,
152 pub content: Option<String>,
154 pub link: Option<String>,
155 pub attrs: Option<String>,
156 pub seq: Option<String>,
157 pub refids: Vec<String>,
158}
159
160
161fn get_special_options<'a,'b>(pre: &'a str, tail: &'b str) -> (&'a str, &'b str) {
162 const SPAN_WRAPPERS: [(&str, &str); 1] = [
163 ("[", "]"),
164 ];
165 for (before, after) in SPAN_WRAPPERS {
166 if pre == before && tail == after {
167 return ("", "")
168 }
169 }
170 (pre, tail)
171}
172
173fn make_url_readable(url: &str) -> &str {
174 for pattern in ["://", ":"] {
175 if let Some(pos) = url.find(pattern) {
176 return &url[pos + pattern.len()..]
177 }
178 }
179 url
180}
181
182pub(crate) struct ParserState<'t> {
183 pub notes: BTreeMap<String, NoteInfo>,
184 pub footnotes: IndexMap<String, String>,
185 shelf: IndexMap<String, String>,
186 urlrefs: IndexMap<String, UrlString<'t>>,
187 note_index: u32,
188 link_index: u32,
189 ref_index: u32,
190 span_depth: u32,
191 ref_cache: IndexMap<u32, String>,
192 pub textile: &'t Textile,
193 ol_starts: IndexMap<String, usize>,
194 unreferenced_notes: BTreeMap<String, NoteInfo>,
195 notelist_cache: IndexMap<String, String>,
196}
197
198
199impl <'t> ParserState<'t> {
200 fn new(textile: &'t Textile) -> Self {
201 Self {
202 textile,
203 notes: Default::default(),
204 footnotes: Default::default(),
205 shelf: Default::default(),
206 urlrefs: Default::default(),
207 note_index: 1,
208 link_index: 0,
209 ref_index: 0,
210 span_depth: 0,
211 ol_starts: Default::default(),
212 ref_cache: Default::default(),
213 notelist_cache: Default::default(),
214 unreferenced_notes: Default::default(),
215 }
216 }
217
218 pub fn increment_link_index(&mut self) -> u32 {
219 self.link_index += 1;
220 self.link_index
221 }
222
223 pub fn parse_note_defs(&mut self, m: &Captures) -> &'static str {
225 let label = &m["label"];
226 let link = &m["link"];
227 let att = &m["att"];
228 let content = &m["content"];
229
230 if !self.notes.contains_key(label) {
232 let new_index = self.increment_link_index();
233 self.notes.insert(
234 label.to_owned(),
235 NoteInfo {
236 id: format!(
237 "{0}{1}",
238 self.textile.link_prefix,
239 new_index),
240 content: None,
241 link: None,
242 attrs: None,
243 seq: None,
244 refids: Default::default(),
245 });
246
247 }
248 if self.notes.contains_key(label) {
250 let note_content = self.graf(content).into_owned();
251 if let Some(mut note) = self.notes.get_mut(label) {
252 if note.link.is_none() {
253 note.link = if link.is_empty() { None } else { Some(link.into()) };
254 note.attrs = Some(
255 BlockAttributes
256 ::parse(att, None, true, self.textile.restricted)
257 .into());
258 note.content = Some(note_content);
259 }
260 }
261 }
262
263 ""
264 }
265 fn make_back_ref_link(info: &NoteInfo, g_links: &str, i: char) -> Cow<'t, str> {
267 fn char_code_to_entity(c: u32) -> String {
268 let entity = format!("&#{};", c);
269 unescape(&entity).into_owned()
270 }
271
272 let backlink_type = match info.link {
273 Some(ref link) => link.as_str(),
274 None => g_links,
275 };
276 let allow_inc = !SYMS_RE_S.contains(i);
277 let mut i_ = i as u32;
278
279 match backlink_type {
280 "!" => Cow::Borrowed(""),
281 "^" => {
282 if !info.refids.is_empty() {
283 Cow::Owned(format!("<sup><a href=\"#noteref{0}\">{1}</a></sup>",
284 info.refids[0], char_code_to_entity(i_)))
285 } else {
286 Cow::Borrowed("")
287 }
288 },
289 _ => {
290 let mut result = String::new();
291 for refid in info.refids.iter() {
292 let sup = format!(
293 "<sup><a href=\"#noteref{0}\">{1}</a></sup>",
294 refid, char_code_to_entity(i_));
295 if allow_inc {
296 i_ += 1;
297 }
298 if !result.is_empty() {
299 result.push(' ');
300 }
301 result.push_str(&sup);
302 }
303 Cow::Owned(result)
304 }
305 }
306 }
307
308 fn place_note_lists<'a>(&mut self, text: &'a str) -> Cow<'a, str> {
310 if !self.notes.is_empty() {
311 let mut o = BTreeMap::<String, NoteInfo>::new();
312 for (label, info) in self.notes.iter() {
313 let mut info_clone = info.clone();
314 if let Some(ref i) = info.seq {
315 info_clone.seq = Some(label.clone());
316 o.insert(i.clone(), info_clone);
317 } else {
318 self.unreferenced_notes.insert(label.clone(), info_clone);
319 }
320 }
321 self.notes = o;
322 }
323 lazy_static! {
324 static ref TEXT_RE: Regex = fregex!(
325 &format!(
326 r"<p>notelist({0})(?:\:([\w|{1}]))?([\^!]?)(\+?)\.?[\s]*</p>",
327 *CLS_RE_S, SYMS_RE_S));
328 }
329 let f_note_lists = |cap: &Captures| -> String {
331 let (att, g_links, extras) = (&cap[1], &cap[3], &cap[4]);
332
333 let start_char = match cap.get(2) {
334 Some(m) => m.as_str().chars().next().expect("Not empty"),
335 None => 'a'
336 };
337 let index = format!("{0}{1}{2}", g_links, extras, start_char);
338 let mut result = String::new();
339
340 if !self.notelist_cache.contains_key(&index) {
341 let mut o = Vec::<String>::new();
342 if !self.notes.is_empty() {
343 for (_seq, info) in self.notes.iter() {
344 let links = Self::make_back_ref_link(info, g_links, start_char);
345 let li = if let NoteInfo {
346 id: ref infoid,
347 attrs: Some(ref atts),
348 content: Some(ref content),
349 ..
350 } = *info {
351 format!("\t\t<li{0}>{1}<span id=\"note{2}\"> </span>{3}</li>",
352 atts, links, infoid, content)
353 } else {
354 format!("\t\t<li>{0} Undefined Note [#{1}].</li>",
355 links, info.seq.as_deref().unwrap_or_default())
356 };
357 o.push(li);
358 }
359 }
360 if extras == "+" && !self.unreferenced_notes.is_empty() {
361 for info in self.unreferenced_notes.values() {
362 let atts = info.attrs.as_deref().unwrap_or_default();
363 let content = info.content.as_deref().unwrap_or_default();
364 o.push(format!("\t\t<li{0}>{1}</li>", atts, content));
365 }
366 }
367 result = o.join("\n");
368 self.notelist_cache.insert(index, result.clone());
369 }
370 if result.is_empty() {
371 result
372 } else {
373 let list_atts: String = BlockAttributes
374 ::parse(att, None, true, self.textile.restricted)
375 .into();
376 format!("<ol{0}>\n{1}\n\t</ol>", list_atts, result)
377 }
378 };
379 TEXT_RE.replace_all(text, f_note_lists)
380 }
381
382 pub fn shelve(&mut self, text: String) -> String {
383 self.ref_index += 1;
384 let item_id = format!("{0}{1}:shelve", self.textile.uid, self.ref_index);
385 self.shelf.insert(item_id.clone(), text);
386 item_id
387 }
388
389 pub fn shelve_url(&mut self, text: UrlString) -> String {
390 let escaped_url = text.to_html_string();
391 self.ref_index += 1;
392 self.ref_cache.insert(self.ref_index, escaped_url);
393 format!("{0}{1}{2}", self.textile.uid, self.ref_index, ":url")
394 }
395
396 pub fn retrieve(&self, text: String) -> String {
397 let mut new_text = text;
398 loop {
399 let old = new_text.clone();
400 for (k, v) in self.shelf.iter() {
401 new_text = new_text.replace(k, v);
402 }
403 if new_text == old {
404 break;
405 }
406 }
407 new_text
408 }
409
410 fn retrieve_urls<'a>(&mut self, text: &'a str) -> Cow<'a, str> {
411 let mut regex_cache = self.textile.regex_cache.borrow_mut();
412 let pattern = regex_cache
413 .entry(line!())
414 .or_default()
415 .entry("")
416 .or_insert_with(
417 || fregex!(&format!(r"{0}(?P<token>[0-9]+):url", self.textile.uid)));
418
419 let retrieve_url = |cap: &Captures| -> String {
420 let token = &cap["token"];
421 match token.parse::<u32>() {
422 Ok(key) => {
423 let url = self.ref_cache.get(&key).cloned().unwrap_or_default();
424 if url.is_empty() {
425 url
426 } else if let Some(rurl) = self.urlrefs.get(&url) {
427 rurl.to_html_string()
428 } else {
429 url
430 }
431 },
432 Err(_) => {
433 String::new()
434 },
435 }
436 };
437 pattern.replace_all(text, retrieve_url)
438 }
439
440 fn f_textile(&mut self, cap: &Captures) -> String {
441 let (before, notextile) = (&cap[1], &cap[2]);
442 let after = unwrap_or_empty(cap.get(3));
443 let (before, after) = get_special_options(before, after);
444 String::from(before) + &self.shelve(notextile.to_owned()) + after
445 }
446
447 pub fn no_textile(&mut self, text: &str) -> String {
448 let step1 = do_special(text, "<notextile>", "</notextile>", |cap: &Captures| {Self::f_textile(self, cap)});
449 let step2 = do_special(&step1, "==", "==", |cap: &Captures| {Self::f_textile(self, cap)});
450 step2.into_owned()
451 }
452
453 pub fn code(&mut self, text: &str) -> String {
454 fn f_code(parser: &mut ParserState, cap: &Captures) -> String {
455 let (before, text) = (&cap[1], &cap[2]);
456 let after = unwrap_or_empty(cap.get(3));
457 let (before, after) = get_special_options(before, after);
458 let text = encode_html(text, false, false);
459 String::from(before) + &parser.shelve(format!("<code>{0}</code>", text)) + after
460 }
461
462 fn f_pre(parser: &mut ParserState, cap: &Captures) -> String {
463 let (before, text) = (&cap[1], &cap[2]);
464 let after = unwrap_or_empty(cap.get(3));
465 let (before, after) = get_special_options(before, after);
466 let text = encode_html(text, true, false);
468 String::from(before) + "<pre>" + &parser.shelve(text) + "</pre>" + after
469 }
470
471 let text = do_special(text, "<code>", "</code>", |cap: &Captures| f_code(self, cap));
472 let text = do_special(&text, "@", "@", |cap: &Captures| f_code(self, cap));
473 do_special(&text, "<pre>", "</pre>", |cap: &Captures| f_pre(self, cap)).into_owned()
474 }
475
476 fn get_html_comments<'a>(&mut self, text: &'a str) -> Cow<'a, str> {
477 do_special(text, "<!--", "-->", |cap: &Captures| -> String {
479 let (before, comment_text) = (&cap[1], &cap[2]);
487 format!("{0}<!--{1}-->", before, self.shelve(comment_text.to_owned()))
488 })
489 }
490
491 pub(crate) fn unrestrict_url<'u>(&self, url: &'u str) -> Cow<'u, str> {
496 if self.textile.restricted {
497 reverse_encode_html(url)
498 } else {
499 url.into()
500 }
501 }
502
503 fn get_refs<'a>(&mut self, text: &'a str) -> Cow<'a, str> {
505 fn make_url_ref_re(schemes: &[&str]) -> Regex {
506 fregex!(
507 &format!(
508 r"(?:(?<=^)|(?<=\s))\[(.+)\]((?:{0}:\/\/|\/)\S+)(?=\s|$)",
509 schemes.join("|")))
510 }
511 lazy_static! {
512 static ref RESTRICTED_URLREF_RE: Regex = make_url_ref_re(&RESTRICTED_URL_SCHEMES[..]);
513 static ref UNRESTRICTED_URLREF_RE: Regex = make_url_ref_re(&UNRESTRICTED_URL_SCHEMES[..]);
514 }
515 let urlref_re: &Regex = if self.textile.restricted {
516 &RESTRICTED_URLREF_RE
517 } else {
518 &UNRESTRICTED_URLREF_RE
519 };
520 urlref_re.replace_all(text, |cap: &Captures| -> &str {
521 let flag = &cap[1];
522 let url = self.unrestrict_url(&cap[2]).into_owned();
523 self.urlrefs.insert(
524 flag.to_string(),
525 url.into());
526 ""
527 })
528 }
529
530
531 fn image<'a>(&mut self, text: &'a str) -> Cow<'a, str> {
532 lazy_static! {
533 static ref PATTERN: Regex = fregex!(
534 &format!(
535 concat!(
536 r"(?:[\[{{])?", r"\!", r"([<>=]|<|>)?", r"({0})", r"(?:\.\s)?", r"([^\s(!]+)", r"\s?", r"(?:\(([^\)]+)\))?", r"\!", r"(?::(\S+)(?<![\]).,]))?", r"(?:[\]}}]|(?=[.,\s)|]|$))", ),
548 *CLS_RE_S));
549 }
550 let f_image = |cap: &Captures| -> String {
551 let url = &cap[3];
552 if !self.is_valid_url(url) {
553 return cap[0].to_owned();
554 }
555 let mut atts = if let Some(attributes) = cap.get(2) {
556 BlockAttributes::parse(attributes.as_str(), None, true, self.textile.restricted).html_attrs()
557 } else {
558 BlockHtmlAttributes::default()
559 };
560
561
562 if let Some(align) = cap.get(1) {
563 let alignment = match align.as_str() {
564 "<" | "<" => "left",
565 "=" => "center",
566 ">" | ">" => "right",
567 _ => unreachable!("Not allowed by regex")
568 };
569 let use_align_class = match self.textile.align_class_enabled {
570 Some(v) => v,
571 None => match self.textile.html_type {
572 HtmlKind::XHTML => false,
573 HtmlKind::HTML5 => true,
574 }
575 };
576 if use_align_class {
577 atts.insert_css_class(format!("align-{}", alignment));
578 } else {
579 atts.insert("align", alignment.to_owned());
580 }
581 }
582
583 let optional_title = cap.get(4).map(|m| m.as_str());
584 atts.insert("alt", optional_title.unwrap_or_default().to_owned());
585
586 if !UrlBits::parse(url).is_relative() && self.textile.get_sizes {
587 if let Some((width, height)) = get_image_size(url) {
588 atts.insert("height", height.to_string());
589 atts.insert("width", width.to_string());
590 }
591 };
592 let url_id = self.shelve_url(
593 self.unrestrict_url(url).into());
594 atts.insert("src", url_id);
595
596 if let Some(title) = optional_title {
597 atts.insert("title", title.to_owned());
598 }
599
600 let img = generate_tag("img", None, &atts);
601 let out = if let Some(href) = cap.get(5) {
602 let shelved_href = self.shelve_url(
603 self.unrestrict_url(href.as_str()).into());
604 if !shelved_href.is_empty() {
605 generate_tag(
606 "a",
607 Some(&img),
608 &[("href".into(), shelved_href)])
609 } else {
610 img
611 }
612 } else {
613 img
614 };
615 self.shelve(out)
616 };
617 PATTERN.replace_all(text, f_image)
618 }
619
620
621 fn links(&mut self, text: &str) -> String {
622 let marked_text = self.mark_start_of_links(text);
623 let result = self.replace_links(&marked_text).into_owned();
624 result
625 }
626
627 fn mark_start_of_links(&self, text: &str) -> String {
633 lazy_static! {
634 static ref SLICE_RE: Regex = fregex!(
635 &format!("\":(?={})", SNIP_CHAR));
636 }
637
638 let mut slices: Vec<_> = split_with_capture(&SLICE_RE, text).collect();
639
640 if slices.len() <= 1 {
641 return text.into();
642 }
643 let mut output: Vec<Cow<str>> = Vec::new();
644
645 let last_slice = slices.pop().expect("Verified, not empty");
646 lazy_static! {
647 static ref START_NOSPACE_RE: Regex = fregex!(r"^\S|=$");
648 static ref END_NOSPACE_RE: Regex = fregex!(r"\S$");
649 }
650 for s in slices {
651 if !s.contains('"') {
654 output.push(Cow::Borrowed(s));
655 continue;
656 }
657 let mut possible_start_quotes: Vec<_> = s.split('"').collect();
661
662 let mut possibility = possible_start_quotes
665 .pop()
666 .expect("checked above, at least one value must be present");
667
668 let mut balanced = 0;
672 let mut linkparts = Vec::<&str>::new();
673 let mut i = 0;
674
675 while balanced != 0 || i == 0 {
676 linkparts.push(possibility);
681
682 if !possibility.is_empty() {
683 if START_NOSPACE_RE.find(possibility).unwrap_or(None).is_some() {
684 balanced -= 1;
685 }
686 if END_NOSPACE_RE.find(possibility).unwrap_or(None).is_some() {
687 balanced += 1;
688 }
689 if let Some(p) = possible_start_quotes.pop() {
690 possibility = p;
691 }
692 } else {
693 balanced += if i == 0 { 1 } else { - 1 };
699 i += 1;
700 if let Some(p) = possible_start_quotes.pop() {
701 possibility = p;
702 } else {
703 linkparts.pop();
706 break;
707 }
708 if possibility.is_empty() || possibility.ends_with(' ') {
711 balanced = 0;
713 }
714 }
715
716 if balanced <= 0 {
717 possible_start_quotes.push(possibility);
718 break;
719 }
720 }
721
722 linkparts.reverse();
725 let link_content = linkparts.join("\"");
726 let pre_link = possible_start_quotes.join("\"");
729 let o = format!(
732 "{0}{1}linkStartMarker:\"{2}",
733 pre_link, self.textile.uid, link_content);
734 output.push(Cow::Owned(o));
735 }
736
737
738 output.push(Cow::Borrowed(last_slice));
740 output.join("\":")
742 }
743
744 fn table<'a>(&mut self, text: &'a str) -> Cow<'a, str> {
745 lazy_static! {
746 static ref PATTERN: Regex = fregex!(
747 &format!(
748 concat!(
749 r"(?ms)^(?:table(?P<tatts>_?{s}{a}{c})\.",
750 r"(?P<summary>.*?)\n)?^(?P<rows>{a}{c}\.? ?\|.*\|)",
751 r"[\s]*\n\n"),
752 s=*TABLE_SPAN_RE_S,
753 a=*ALIGN_RE_S,
754 c=*CLS_RE_S));
755 }
756 let text = format!("{0}\n\n", text);
757 match PATTERN.captures(&text) {
758 Ok(Some(cap)) => process_table(
759 self,
760 unwrap_or_empty(cap.name("tatts")),
761 &cap["rows"],
762 cap.name("summary").map(|m| m.as_str())).into(),
763 _ => text.into()
764 }
765 }
766
767 pub(crate) fn redcloth_list<'a>(&mut self, text: &'a str) -> Cow<'a, str> {
770 lazy_static! {
771 static ref PATTERN: Regex = fregex!(
772 &format!(r"(?ms)^([-]+{0}[ .].*:=.*)$(?![^-])", *CLS_RE_S));
773 static ref SPLITTER: Regex = fregex!(
774 r"(?m)\n(?=[-])");
775
776 static ref ATTR_CONTENT_RE: Regex = fregex!(
778 &format!(r"(?ms)^[-]+({0})\.? (.*)$", *CLS_RE_S));
779 static ref XM_RE: Regex = fregex!(
781 &format!(r"(?s)^(.*?){0}*:=(.*?){0}*(=:|:=)?{0}*$",
782 SNIP_SPACE));
783 }
784
785 let f_rc_list = |cap: &Captures| -> String {
786 let mut out = Vec::<Cow<str>>::new();
787 for line in split_with_capture(&SPLITTER, &cap[0]) {
788 if let Ok(Some(m)) = ATTR_CONTENT_RE.captures(line) {
789 let atts = &m[1];
790 let content = m[2].trim();
791 let html_atts_str: String = BlockAttributes
792 ::parse(atts, None, true, self.textile.restricted)
793 .into();
794
795 let xm_capture = XM_RE.captures(content);
796 let (term, definition) = if let Ok(Some(ref xm)) = xm_capture {
797 (xm[1].trim(), xm[2].trim_matches(' '))
798 } else {
799 (content, "")
800 };
801
802 if out.is_empty() {
804 let dltag = if definition.is_empty() {
805 format!("<dl{0}>", html_atts_str).into()
806 } else {
807 "<dl>".into()
808 };
809 out.push(dltag);
810 }
811
812 if !term.is_empty() {
813 let newline_started_def = definition.starts_with('\n');
814 let mut definition = definition
815 .trim()
816 .replace('\n', self.textile.proper_br_tag());
817
818 if newline_started_def {
819 definition = format!("<p>{0}</p>", definition);
820 }
821 let term = term.replace('\n', self.textile.proper_br_tag());
822
823 let term = self.graf(&term);
824 let definition = self.graf(&definition);
825
826 out.push(format!("\t<dt{0}>{1}</dt>", html_atts_str, term).into());
827 if !definition.is_empty() {
828 out.push(format!("\t<dd>{0}</dd>", definition).into());
829 }
830 }
831
832 } else {
833 continue;
834 }
835 }
836 if !out.is_empty() {
837 out.push(Cow::Borrowed("</dl>"));
838 }
839 out.join("\n")
840 };
841
842 PATTERN.replace_all(text, f_rc_list)
843 }
844
845 pub(crate) fn textile_lists<'a>(&mut self, text: &'a str) -> Cow<'a, str> {
846 lazy_static! {
847 static ref PATTERN: Regex = fregex!(
848 &format!(
849 concat!(r"(?ms)^((?:[*;:]+|[*;:#]*#(?:_|\d+)?){0}[ .].*)$",
850 r"(?![^#*;:])"),
851 *CLS_RE_S));
852 static ref SPLITTER: Regex = fregex!(r"(?m)\n(?=[*#;:])");
853 static ref LINE_PARSER: Regex = fregex!(
854 &format!(
855 concat!(
856 r"(?s)^(?P<tl>[#*;:]+)(?P<st>_|\d+)?(?P<atts>{0})[ .]",
857 r"(?P<content>.*)$"),
858 *CLS_RE_S));
859 }
860 struct ListItem<'t> {
861 atts: &'t str,
862 content: Cow<'t, str>,
863 level: usize,
864 tl: &'t str,
865 st: &'t str,
866 }
867
868 fn list_type(tl: &str) -> &'static str {
869 lazy_static! {
870 static ref START_RE: Regex = fregex!(r"^([#*]+)");
871 }
872 match START_RE.captures(tl) {
873 Ok(Some(m)) => if m[1].ends_with('#') { "ol" } else { "ul" },
874 _ => "dl"
875 }
876 }
877
878 let f_textile_list = |cap: &Captures| -> String {
879 let text = &cap[0];
880 let lines = split_with_capture(&SPLITTER, text);
881 let mut list_items = Vec::<ListItem>::new();
882 for line in lines {
883 if let Ok(Some(m)) = LINE_PARSER.captures(line) {
884 let tl = unwrap_or_empty(m.name("tl"));
886 list_items.push(
887 ListItem {
888 tl,
889 atts: unwrap_or_empty(m.name("atts")),
890 content: unwrap_or_empty(m.name("content")).into(),
891 level: tl.len(),
892 st: unwrap_or_empty(m.name("st")),
893 });
894 } else {
895 if let Some(last_item) = list_items.last_mut() {
897 last_item.content += "\n";
898 last_item.content += line;
899 }
900 }
901 }
902 if list_items.is_empty() || list_items[0].level > 1 {
903 return cap[0].to_owned();
904 }
905 let mut prev: Option<&ListItem> = None;
906
907 let mut lists = IndexMap::<&str, usize>::new();
908 let mut out = Vec::<String>::new();
909 let mut litem = "";
910 for (index, item) in list_items.iter().enumerate() {
911 let content = item.content.trim();
912 let ltype = list_type(item.tl);
913 litem = if item.tl.contains(';') {
914 "dt"
915 } else if item.tl.contains(':') {
916 "dd"
917 } else {
918 "li"
919 };
920 let next = list_items.get(index + 1);
921 let show_item = !content.is_empty();
922
923 let mut atts = BlockAttributes
924 ::parse(item.atts, None, true, self.textile.restricted)
925 .html_attrs();
926 if ltype == "ol" {
928 let start_value = self.ol_starts.entry(item.tl.to_string()).or_insert(1);
929 if prev.map(|p| item.level > p.level).unwrap_or(true) {
930 if item.st.is_empty() {
931 *start_value = 1;
932 } else if item.st != "_" {
933 if let Ok(int_st) = item.st.parse() {
934 *start_value = int_st;
935 }
936 }
937
938 if !item.st.is_empty() {
939 atts.insert("start", start_value.to_string());
940 }
941 }
942
943 if show_item {
944 *start_value += 1;
945 }
946 }
947
948 if let Some(p) = prev {
949 if p.tl.contains(';') && item.tl.contains(':') {
950 lists.insert(item.tl, 2);
951 }
952 }
953 let tabs = "\t".repeat(item.level - 1);
954 let mut line = if !lists.contains_key(item.tl) {
955 lists.insert(item.tl, 1);
956 if show_item {
957 format!(
958 "{0}<{1}{2}>\n{0}\t<{3}>{4}",
959 tabs, ltype, atts.to_string(),
960 litem, content)
961 } else {
962 format!(
963 "{0}<{1}{2}>",
964 tabs, ltype, atts.to_string())
965 }
966 } else if show_item {
967 format!(
968 "{0}\t<{1}{2}>{3}",
969 tabs, litem, atts.to_string(), content)
970 } else {
971 String::new()
972 };
973
974 if show_item && next.map(|n| n.level <= item.level).unwrap_or(true) {
975 line += &format!("</{0}>", litem);
976 }
977
978 for (k, v) in lists.clone().iter().rev() {
979 let indent = k.len();
980 if next.map(|n| indent > n.level).unwrap_or(true) {
981 if *v != 2 {
982 line += &format!("\n{0}</{1}>", tabs, list_type(k));
983 if indent > 1 {
984 line += "</";
985 line += litem;
986 line += ">";
987 }
988 }
989 lists.shift_remove(k);
990 }
991 }
992 prev = Some(item);
993 out.push(line);
994 }
995 let merged_out = out.join("\n");
996 self.do_tag_br(litem, &merged_out).into_owned()
997 };
998
999 PATTERN.replace_all(text, f_textile_list)
1000 }
1001
1002 pub(crate) fn do_tag_br<'a>(&mut self, tag: &'static str, input: &'a str) -> Cow<'a, str> {
1005
1006 fn eq_ignore_ascii_case(a: &str, b: &str) -> bool {
1007 if a.len() == b.len() {
1008 a.chars().zip(b.chars()).all(|(a_c, b_c)| a_c.eq_ignore_ascii_case(&b_c))
1009 } else {
1010 false
1011 }
1012 }
1013 fn insert_brs<'c>(text: &'c str, br: &str) -> Cow<'c, str> {
1021 let num_newlines = text.match_indices('\n').count();
1022 if num_newlines == 0 {
1023 return text.into()
1024 }
1025 const STOP_PREFIXES: [&str; 6] = ["</dd>", "</dt>", "</li>", "<br/>", "<br>", "<br />"];
1027 let mut output = String::with_capacity(text.len() + num_newlines * br.len());
1028 let lc_text = text.to_lowercase();
1029 let mut next_start = 0;
1030 while let Some(rel_newline_pos) = lc_text[next_start..].find('\n') {
1031 let abs_newline_pos = next_start + rel_newline_pos;
1032 output += &text[next_start..abs_newline_pos];
1033 let is_next_good = !lc_text[abs_newline_pos + 1..]
1035 .starts_with(|c| char::is_whitespace(c) || c == '|');
1036 if is_next_good {
1037 let is_prefix_good = !STOP_PREFIXES.iter().any(|p| {
1039 let prefix_start = abs_newline_pos - p.len().min(abs_newline_pos);
1040 let prefix = &lc_text[prefix_start..abs_newline_pos];
1041 eq_ignore_ascii_case(prefix, *p)
1042 });
1043 if is_prefix_good {
1044 output += br;
1045 }
1046 }
1047 output.push('\n');
1048 next_start = abs_newline_pos + 1;
1049 }
1050 if output.is_empty() {
1051 text.into()
1052 } else {
1053 output += &text[next_start..];
1054 output.into()
1055 }
1056 }
1057
1058 let mut regex_cache = self.textile.regex_cache.borrow_mut();
1059 let pattern = regex_cache
1060 .entry(line!())
1061 .or_default()
1062 .entry(tag)
1063 .or_insert_with(
1064 || fregex!(
1065 &format!(r"(?s)<{0}([^>]*?)>(.*)</{0}>",
1066 fancy_regex::escape(tag))));
1067
1068 let br_tag = self.textile.proper_br_tag();
1069 pattern.replace_all(input, |cap: &Captures| -> String {
1070 let content = insert_brs(&cap[2], br_tag);
1071 format!("<{0}{1}>{2}</{0}>", tag, &cap[1], content)
1072 })
1073 }
1074
1075 fn do_p_br<'a>(&mut self, input: &'a str) -> Cow<'a, str> {
1076 lazy_static! {
1077 static ref TAG_RE: Regex = fregex!(r"(?s)<(p|h[1-6])([^>]*?)>(.*)(</\1>)");
1078 static ref BR_RE: Regex = fregex!(
1079 &format!(r"(?i)<br[ ]*/?>{0}*\n(?![{0}|])", SNIP_SPACE));
1080 static ref NEWLINE_RE: Regex = fregex!(r"\n(?![\s|])");
1081 }
1082
1083 let f_do_p_br = |cap: &Captures| -> String {
1084 let text = &cap[3];
1085 let text = BR_RE.replace_all(text, "\n");
1086 let text = NEWLINE_RE.replace_all(
1087 &text,
1088 self.textile.proper_br_tag());
1089 format!("<{0}{1}>{2}{3}", &cap[1], &cap[2], text, &cap[4])
1090 };
1091 TAG_RE.replace_all(input, f_do_p_br)
1092 }
1093
1094
1095 fn footnote_ref<'a>(&mut self, text: &'a str) -> Cow<'a, str> {
1096 lazy_static! {
1097 static ref PATTERN: Regex = fregex!(
1098 &format!(
1099 r"(?<=\S)\[(?P<id>{0}+)(?P<nolink>!?)\](?P<space>{1}?)",
1100 SNIP_DIGIT,
1101 SNIP_SPACE));
1102 }
1103
1104 let f_footnote_id = |cap: &Captures| -> String {
1105 let mut fn_att = Vec::<(String, String)>::new();
1106 fn_att.push(("class".to_owned(), "footnote".to_owned()));
1107
1108 let match_id = &cap["id"];
1109 if !self.footnotes.contains_key(match_id) {
1110 let new_index = self.increment_link_index();
1111 let fn_id = format!("{0}{1}", self.textile.link_prefix, new_index);
1112 fn_att.push(("id".to_owned(), format!("fnrev{0}", &fn_id)));
1113 self.footnotes.insert(match_id.to_owned(), fn_id);
1114 }
1115 let fn_id = &self.footnotes[match_id];
1116 let link_tag = generate_tag(
1117 "a",
1118 Some(match_id),
1119 &[("href".to_owned(), format!("#fn{0}", fn_id))]);
1120 let sup_tag = match cap.name("nolink") {
1121 Some(m) if m.as_str() == "!" => {
1122 generate_tag("sup", Some(match_id), &fn_att)
1123 },
1124 _ => generate_tag("sup", Some(&link_tag), &fn_att)
1125 };
1126 format!("{0}{1}", sup_tag, &cap["space"])
1127 };
1128
1129 PATTERN.replace_all(text, f_footnote_id)
1130 }
1131
1132 fn note_ref<'a>(&mut self, text: &'a str) -> Cow<'a, str> {
1134 lazy_static! {
1135 static ref TEXT_RE: Regex = fregex!(
1136 &format!(
1137 concat!(
1138 r"\[", r"({0})", r"\#",
1141 r"([^\]!]+)", r"([!]?)", r"\]"),
1144 *CLS_RE_S));
1145 }
1146
1147 let f_parse_note_refs = |cap: &Captures| -> String {
1152 let (atts, label, nolink) = (&cap[1], &cap[2], &cap[3]);
1153 let html_atts = BlockAttributes::parse(atts, None, true, self.textile.restricted).html_attrs();
1154
1155 let num = if let Some(NoteInfo{seq: Some(num), ..}) = self.notes.get(label) {
1157 num.clone()
1158 } else {
1159 let num = self.note_index.to_string();
1160 self.notes.insert(
1161 label.to_string(),
1162 NoteInfo {
1163 seq: Some(num.clone()),
1164 id: "".to_owned(),
1165 refids: Default::default(),
1166 attrs: None,
1167 content: None,
1168 link: None,
1169 });
1170 self.note_index += 1;
1171 num
1172 };
1173
1174 let new_index = self.increment_link_index();
1177 let refid = format!("{0}{1}", self.textile.link_prefix, new_index);
1178 let is_note_id_empty = self.notes[label].id.is_empty();
1179 let new_id: Cow<str> = if is_note_id_empty {
1180 let new_index = self.increment_link_index();
1181 format!("{0}{1}", self.textile.link_prefix, new_index).into()
1182 } else {
1183 "".into()
1184 };
1185 let mut result = format!("<span id=\"noteref{0}\">{1}</span>", &refid, num);
1187 if nolink != "!" {
1188 result = format!("<a href=\"#note{0}\">{1}</a>", &new_id, result);
1189 }
1190 self.notes.entry(label.to_owned()).and_modify(|note_ref| {
1191 note_ref.refids.push(refid);
1192 if is_note_id_empty {
1193 note_ref.id.replace_range(.., &new_id);
1194 }
1195 });
1196 generate_tag("sup", Some(&result), &html_atts)
1198 };
1199 TEXT_RE.replace_all(text, f_parse_note_refs)
1200 }
1201
1202
1203
1204 fn glyphs<'a>(&mut self, text: &'a str) -> Cow<'a, str> {
1220 lazy_static! {
1221 static ref HTML5_GLYPH_REPLACERS: [(Regex, &'static str); 22] = make_glyph_replacers(true);
1222 static ref XHTML_GLYPH_REPLACERS: [(Regex, &'static str); 22] = make_glyph_replacers(false);
1223 static ref SPLITTER_RE: Regex = fregex!(r"(<[\w\/!?].*?>)");
1224 }
1225
1226 let text = text.trim_end_matches('\n');
1227 let mut result = Vec::new();
1228
1229 let replacers = match self.textile.html_type {
1230 HtmlKind::HTML5 => &HTML5_GLYPH_REPLACERS[..],
1231 HtmlKind::XHTML => &XHTML_GLYPH_REPLACERS[..],
1232 };
1233 for (i, raw_line) in split_with_capture(&SPLITTER_RE, text).enumerate() {
1235 result.push(
1236 if i % 2 == 0 {
1237 let raw_line = if !self.textile.restricted {
1238 Cow::Owned(
1239 LONE_AMP_RE.replace_all(raw_line, "&")
1240 .replace('<', "<")
1241 .replace('>', ">"))
1242 } else {
1243 Cow::Borrowed(raw_line)
1244 };
1245 multi_replace(
1246 raw_line,
1247 replacers
1248 .iter()
1249 .map(|item| (&item.0, item.1))
1250 .chain(self.textile.dyn_glyph_replacers.iter()
1251 .map(|item| (&item.0, item.1.as_str())))
1252 ).into()
1253 } else {
1254 Cow::Borrowed(raw_line)
1255 });
1256 }
1257 result.join("").into()
1258 }
1259
1260 fn replace_links<'a>(&mut self, text: &'a str) -> Cow<'a, str> {
1261 const STOPCHARS:&str = r#"\s|^'"*"#;
1263 let mut regex_cache = self.textile.regex_cache.borrow_mut();
1264 let needle = format!("{0}linkStartMarker:", self.textile.uid);
1265 let pattern = regex_cache
1266 .entry(line!())
1267 .or_default()
1268 .entry("")
1269 .or_insert_with(
1270 || fregex!(
1271 &format!(
1272 concat!(
1273 r"(?P<pre>\[)?",
1275 "{0}\"",
1277 r"(?P<inner>(?:.|\n)*?)",
1280 "\":",
1282 r"(?P<urlx>[^{1}]*)"),
1284 needle, STOPCHARS)));
1285
1286 let mut f_link = |cap: &Captures| -> String {
1287 let in_ = &cap[0];
1288 let mut pre = unwrap_or_empty(cap.get(1)).to_owned();
1289 let inner = cap[2].replace('\n', self.textile.proper_br_tag());
1290 let mut url = &cap[3];
1291 if inner.is_empty() {
1292 return format!(r#"{0}"{1}":{2}"#, pre, inner, url);
1293 }
1294 lazy_static! {
1295 static ref BLOCK_RE: Regex = fregex!(
1296 &format!(
1297 concat!(
1298 r"^",
1299 r"(?P<atts>{0})", r"{1}*", r"(?P<text>", r"(!.+!)", r"|", r".+?", r")", r"(?:\((?P<title>[^)]+?)\))?", r"$"),
1308 *CLS_RE_S, SNIP_SPACE));
1309 }
1310
1311 let (atts, text, title) = if let Ok(Some(m)) = BLOCK_RE.captures(&inner) {
1312 let m_text = unwrap_or_empty(m.name("text"));
1313 (unwrap_or_empty(m.name("atts")),
1314 if m_text.is_empty() { inner.as_str() } else { m_text },
1315 unwrap_or_empty(m.name("title")))
1316 } else {
1317 ("", inner.as_str(), "")
1318 };
1319 let mut pop = String::new();
1320 let mut tight = String::new();
1321 let csb_count: usize = url.matches(']').count();
1322 let mut counts = CharCounter::new(['[', ']', '(', ')']);
1323 counts[']'] = Some(csb_count);
1324 if csb_count > 0 {
1333 lazy_static! {
1334 static ref URL_RE: Regex = fregex!(r"(?P<url>^.*\])(?P<tight>\[.*?)$");
1335 }
1336
1337 if let Ok(Some(m)) = URL_RE.captures(url) {
1338 url = unwrap_or_empty(m.get(1));
1339 tight.replace_range(.., &m[2]);
1340 }
1341 }
1342 if csb_count > 0 {
1348 lazy_static! {
1349 static ref URL_RE: Regex = fregex!(r"(?P<url>^.*\])(?!=)(?P<end>.*?)$");
1350 }
1351 if let Ok(Some(m)) = URL_RE.captures(url) {
1352 url = unwrap_or_empty(m.name("url"));
1353 tight = format!("{0}{1}", &m["end"], tight);
1354 }
1355 }
1356
1357 let mut first = true;
1361 let mut url_chars: Vec<_> = url.chars().collect();
1362
1363 loop {
1364 let mut popped = false;
1365 if let Some(c) = url_chars.pop() {
1366 match c {
1367 '!' | '?' | ':' | ';' | '.' | ',' => {
1368 pop.insert(0, c);
1371 popped = true;
1372 },
1373 '>' => {
1374 let url_left: String = url_chars.iter().collect();
1375
1376 lazy_static! {
1377 static ref RE: Regex = fregex!(r"^(?P<url_chars>.*)(?P<tag></[a-z]+)$");
1378 }
1379 if let Ok(Some(m)) = RE.captures(&url_left) {
1380 url_chars.splice(.., m["url_chars"].chars());
1381 pop = format!("{0}{1}{2}", &m["tag"], c, pop);
1382 popped = true;
1383 }
1384 },
1385 ']' => {
1386 if counts['['].is_none() {
1391 counts['['] = Some(url.matches('[').count());
1392 }
1393 if counts['['] == counts[']'] {
1394 url_chars.push(c)
1396 } else {
1397 popped = true;
1399 counts.dec(']');
1400 if first {
1401 pre.clear();
1402 }
1403 }
1404 },
1405 ')' => {
1406 if counts[')'].is_none() {
1407 counts['('] = Some(url.matches('(').count());
1408 counts[')'] = Some(url.matches(')').count());
1409 }
1410
1411 if counts['('] == counts[')'] {
1412 url_chars.push(c);
1413 } else {
1414 pop.insert(0, c);
1416 counts.dec(')');
1417 popped = true;
1418 }
1419 },
1420 _ => {
1421 url_chars.push(c);
1422 }
1423 }
1424 }
1425
1426 first = false;
1427 if !popped {
1428 break;
1429 }
1430 }
1431
1432 let url: String = url_chars.iter().collect();
1433
1434 let url = self.unrestrict_url(&url);
1435 let uri_parts = UrlBits::parse(&url);
1436 let allowed_schemes = if self.textile.restricted {
1437 &RESTRICTED_URL_SCHEMES[..]
1438 } else {
1439 &UNRESTRICTED_URL_SCHEMES[..]
1440 };
1441 let scheme_in_list = allowed_schemes.contains(&(uri_parts.scheme()));
1442 let is_valid_url = uri_parts.scheme().is_empty() || scheme_in_list;
1443 if !is_valid_url {
1444 return in_.replace(&format!("{0}linkStartMarker:", self.textile.uid), "");
1445 }
1446
1447 let text: Cow<str> = if text == "$" {
1448 if scheme_in_list {
1449 make_url_readable(&url).into()
1450 } else if let Some(rurl) = self.urlrefs.get(url.as_ref()) {
1451 encode_html(make_url_readable(rurl.source()), true, true).into()
1452 } else {
1453 url
1454 }
1455 } else {
1456 text.into()
1457 };
1458
1459 let text = text.trim();
1460 let title = encode_html(title, false, false);
1461
1462 let text = if !self.textile.noimage {
1463 self.image(text)
1464 } else {
1465 Cow::Borrowed(text)
1466 };
1467 let text = self.span(&text);
1468 let text = self.glyphs(&text);
1469
1470
1471 let normalized_url = uri_parts.to_string();
1472 let url_id = self.shelve_url(
1473 UrlString::Normalized(normalized_url.into()));
1474 let mut attributes = BlockAttributes::parse(atts, None, true, self.textile.restricted).html_attrs();
1475 attributes.insert("href", url_id);
1476 if !title.is_empty() {
1477 attributes.insert("title", self.shelve(title));
1478 }
1479 if let Some(ref rel) = self.textile.rel {
1480 attributes.insert("rel", rel.clone());
1481 }
1482 let a_text = generate_tag("a", Some(&text), &attributes);
1483 let a_shelf_id = self.shelve(a_text);
1484 let result = format!("{0}{1}{2}{3}", pre, a_shelf_id, pop, tight);
1485 result
1486 };
1487
1488
1489 let mut prev_text = Cow::Borrowed(text);
1490 let mut abort = false;
1491 while !abort && prev_text.contains(&needle) {
1492 let new_text = pattern.replace_all(&prev_text, &mut f_link);
1493 if new_text == prev_text {
1494 abort = true;
1495 }
1496 prev_text = new_text.into_owned().into()
1497 }
1498 prev_text
1499 }
1500
1501 fn is_valid_url(&self, url: &str) -> bool {
1502 let uri_parts = UrlBits::parse(url);
1503 if uri_parts.scheme().is_empty() {
1504 true
1505 } else {
1506 let allowed_schemes = if self.textile.restricted {
1507 &RESTRICTED_URL_SCHEMES[..]
1508 } else {
1509 &UNRESTRICTED_URL_SCHEMES[..]
1510 };
1511 allowed_schemes.contains(&(uri_parts.scheme()))
1512 }
1513 }
1514
1515 pub fn graf<'a>(&mut self, text: &'a str) -> Cow<'a, str> {
1516 let lite = self.textile.lite;
1517 let text = Cow::Borrowed(text);
1518 let text = if !lite {self.no_textile(&text).into()} else {text};
1519 let text = if !lite {self.code(&text).into()} else {text};
1520 let text = self.get_html_comments(&text);
1521 let text = self.get_refs(&text);
1522 let ltext = self.glyph_quoted_quote(&text);
1523 let text = self.links(<ext);
1524 let text = if !self.textile.noimage {self.image(&text)} else {text.into()};
1525 let text = if !lite {self.table(&text)} else {text};
1526 let text = if !lite {self.redcloth_list(&text)} else {text};
1527 let text = if !lite { self.textile_lists(&text)} else {text };
1528 let text = self.span(&text);
1529 let text = self.footnote_ref(&text);
1530 let text = self.note_ref(&text);
1531 let text = self.glyphs(&text);
1532 Cow::Owned(text.trim_end_matches('\n').to_owned())
1533 }
1534
1535 fn span<'a>(&mut self, text: &'a str) -> Cow<'a, str> {
1536 lazy_static! {
1537 static ref TAG_PATTERNS: [Regex; 10] = [
1538 span_re(r"\*\*"), span_re(r"\*"), span_re(r"\?\?"),
1539 span_re(r"\-"), span_re(r"__"), span_re(r"_"), span_re(r"%"),
1540 span_re(r"\+"), span_re(r"~"), span_re(r"\^")
1541 ];
1542 }
1543 self.span_depth += 1;
1544 let can_replace = self.span_depth <= self.textile.max_span_depth;
1545
1546 let f_span = |cap: &Captures| -> String {
1547 let tag = match &cap[2] {
1549 "*" => "strong",
1550 "**" =>"b",
1551 "??" =>"cite",
1552 "_" => "em",
1553 "__" =>"i",
1554 "-" => "del",
1555 "%" => "span",
1556 "+" => "ins",
1557 "~" => "sub",
1558 "^" => "sup",
1559 _ => unreachable!("Not allowed by the regex")
1560 };
1561 let atts = &cap[3];
1562 let mut html_atts = BlockAttributes::parse(atts, None, true, self.textile.restricted).html_attrs();
1563 if let Some(cite) = cap.get(4) {
1564 html_atts.insert("cite", cite.as_str().trim().to_owned());
1565 }
1566 let content = &cap[5];
1567 let content = self.span(content);
1568 let end = &cap[6];
1569 let (pre, tail) = get_special_options(
1570 unwrap_or_empty(cap.get(1)),
1571 unwrap_or_empty(cap.get(7)));
1572 let mut open_tag = String::from("<") + tag;
1573 join_html_attributes(&mut open_tag, &html_atts);
1574 open_tag.push('>');
1575 let close_tag = format!("</{}>", tag);
1576 let (open_tag_id, close_tag_id) = self.store_tags(open_tag, close_tag);
1577 String::from(pre) + &open_tag_id + &content + end + &close_tag_id + tail
1578 };
1579
1580 let mut text = Cow::Borrowed(text);
1581 if can_replace {
1582 text = Cow::Owned(multi_replace_with_one(text, TAG_PATTERNS.iter(), f_span));
1583 }
1584 self.span_depth -= 1;
1585 text
1586 }
1587
1588 fn store_tags(&mut self, open_tag: String, close_tag: String) -> (String, String) {
1589 self.ref_index += 1;
1590 self.ref_cache.insert(self.ref_index, open_tag);
1591 let open_tag_id = format!("{0}{1}:ospan ", self.textile.uid, self.ref_index);
1592
1593 self.ref_index += 1;
1594 self.ref_cache.insert(self.ref_index, close_tag);
1595 let close_tag_id = format!(" {0}{1}:cspan", self.textile.uid, self.ref_index);
1596 (open_tag_id, close_tag_id)
1597 }
1598
1599 fn retrieve_tags(&self, text: &str) -> String {
1600 let f_retrieve_tags = |cap: &Captures| -> String {
1601 let tag_id = cap[1].parse::<u32>().expect("must be an integer");
1602 self.ref_cache.get(&tag_id).cloned().unwrap_or_default()
1603 };
1604 let result = {
1605 let mut regex_cache = self.textile.regex_cache.borrow_mut();
1606 let open_tag_re: &Regex =
1607 regex_cache
1608 .entry(line!())
1609 .or_default()
1610 .entry("")
1611 .or_insert_with(
1612 || fregex!(&format!("{0}(?P<token>[0-9]+):ospan ", self.textile.uid)));
1613 open_tag_re.replace_all(text, f_retrieve_tags)
1614 };
1615 let result = {
1616 let mut regex_cache = self.textile.regex_cache.borrow_mut();
1617 let close_tag_re: &Regex =
1618 regex_cache
1619 .entry(line!())
1620 .or_default()
1621 .entry("")
1622 .or_insert_with(
1623 || fregex!(&format!(" {0}(?P<token>[0-9]+):cspan", self.textile.uid)));
1624 close_tag_re.replace_all(&result, f_retrieve_tags)
1625 };
1626 result.into_owned()
1627 }
1628
1629 pub fn block<'b>(&mut self, text: &'b str) -> String {
1630 fn textile_block_re(block_tags_pattern: &str) -> Regex {
1631 fregex!(
1632 &format!(
1633 concat!(r"(?s)^(?P<tag>{0})(?P<atts>{1}{2}{1})\.(?P<ext>\.?)",
1634 r"(?::(?P<cite>\S+))? (?P<graf>.*)$"),
1635 block_tags_pattern, *ALIGN_RE_S, *CLS_RE_S))
1636 }
1637 lazy_static! {
1638 static ref TEXTILE_TAG_RE: Regex = textile_block_re(
1639 BLOCK_TAGS_RE_S);
1640 static ref TEXTILE_LIGHT_TAG_RE: Regex = textile_block_re(
1641 BLOCK_TAGS_LITE_RE_S);
1642 static ref MULTI_ENDLINE_RE: Regex = fregex!(r"(\n{2,})");
1643 static ref BR_TAG_RE: Regex = fregex!(r"(?i)<br\s*?/?>");
1644 }
1645 let mut out = Vec::<Cow<'b, str>>::new();
1646 let tag_pattern: &Regex = if self.textile.lite {
1647 &TEXTILE_LIGHT_TAG_RE
1648 } else {
1649 &TEXTILE_TAG_RE
1650 };
1651 let mut whitespace = String::new();
1652 let mut eat_whitespace = false;
1653 let mut ext = "";
1654 let mut tag = "";
1655 let mut atts = "";
1656 let mut cite = None;
1657 let mut last_outer_closing = String::new();
1658 let mut eat = false;
1659 let textblocks = split_with_capture(&MULTI_ENDLINE_RE, text);
1660 for block in textblocks {
1661 if block.trim().is_empty() {
1662 if !eat_whitespace {
1663 whitespace += block;
1664 }
1665 continue;
1666 }
1667
1668 if ext.is_empty() {
1669 tag = "p";
1670 atts = "";
1671 cite = None;
1672 eat = false;
1673 }
1674
1675 eat_whitespace = false;
1676 let mut is_anonymous_block = true;
1677 let block_output = if let Ok(Some(m)) = tag_pattern.captures(block) {
1678 is_anonymous_block = false;
1679 if !ext.is_empty() {
1681 if let Some(last_out) = out.last_mut() {
1682 last_out.to_mut().push_str(&last_outer_closing);
1683 }
1684 }
1685 tag = unwrap_or_empty(m.get(1));
1686 atts = unwrap_or_empty(m.get(2));
1687 ext = unwrap_or_empty(m.get(3));
1688 cite = m.get(4).as_ref().map(Match::as_str);
1689 let content = unwrap_or_empty(m.get(5));
1690 let bdata = Block::new(tag, atts, cite, content, self);
1691 eat = bdata.eat;
1692 last_outer_closing.replace_range(.., &bdata.outer_closing);
1693
1694 bdata.outer_opening
1695 + &bdata.inner_opening
1696 + &bdata.content
1697 + &bdata.inner_closing
1698 + if ext.is_empty() { &bdata.outer_closing } else { "" }
1699 } else {
1700 let raw_block = DIVIDER_RE.is_match(block).unwrap_or_default();
1701 if !ext.is_empty() || (!block.starts_with(' ') && !raw_block) {
1702 let bdata = Block::new(tag, atts, cite, block, self);
1703 eat = bdata.eat;
1704 last_outer_closing.replace_range(.., &bdata.outer_closing);
1705 if bdata.content.is_empty() || (tag == "p" && !has_raw_text(&bdata.content)) {
1707 bdata.content
1708 } else {
1709 bdata.inner_opening + &bdata.content + &bdata.inner_closing
1710 }
1711 } else if raw_block && self.textile.restricted {
1712 self.shelve(encode_html(block, self.textile.restricted, false))
1713 } else if raw_block {
1714 self.shelve(block.to_owned())
1715 } else {
1716 self.graf(block).into_owned()
1717 }
1718 };
1719 let block_output = self.do_p_br(&block_output);
1720 let block_output = whitespace.clone() + &BR_TAG_RE
1721 .replace_all(
1722 &block_output,
1723 self.textile.proper_br_tag());
1724
1725 if !ext.is_empty() && is_anonymous_block {
1726 if let Some(last_out) = out.last_mut() {
1727 last_out.to_mut().push_str(&block_output);
1728 }
1729 } else if !eat {
1730 out.push(block_output.into());
1731 }
1732
1733 if eat {
1734 eat_whitespace = true;
1735 } else {
1736 whitespace.clear();
1737 }
1738 }
1739 if !ext.is_empty() {
1740 if let Some(last_output) = out.last_mut() {
1741 *last_output += last_outer_closing.as_str();
1742 }
1743 }
1744 out.join("")
1745 }
1746
1747 fn glyph_quoted_quote<'a>(&mut self, text: &'a str) -> Cow<'a, str> {
1748 const QUOTE_STARTS: &str = "\"'({[«»‹›„‚‘”";
1749 lazy_static! {
1750 static ref PATTERN_RE: Regex = fregex!(
1751 &format!(" (?P<pre>[{}])(?P<quoted>\"?|\"[^\"]+)(?P<post>.) ",
1752 fancy_regex::escape(QUOTE_STARTS)));
1753 }
1754
1755 fn matching_quote(quote: char) -> Option<char> {
1756 match quote {
1757 '"' => Some('"'),
1758 '\'' => Some('\''),
1759 '(' => Some(')'),
1760 '{' => Some('}'),
1761 '[' => Some(']'),
1762 '«' => Some('»'),
1763 '»' => Some('«'),
1764 '‹' => Some('›'),
1765 '›' => Some('‹'),
1766 '„' => Some('“'),
1767 '‚' => Some('‘'),
1768 '‘' => Some('’'),
1769 '”' => Some('“'),
1770 _ => None
1771 }
1772 }
1773
1774 let f_glyph_quoted_quote = |m: &Captures| -> String {
1775 let mut pre_char_buf = [0u8; 4];
1777 let mut post_char_buf = [0u8; 4];
1778 if let Some(pre_char) = m["pre"].chars().next() {
1779 if let Some(post_char) = m["post"].chars().next() {
1780 if Some(post_char) != matching_quote(pre_char) {
1781 return m[0].to_owned();
1782 }
1783 let new_pre = match pre_char {
1784 '"' => "“",
1785 '\'' => "‘",
1786 ' ' => " ",
1787 x => x.encode_utf8(&mut pre_char_buf)
1789 };
1790 let new_post = match post_char {
1791 '"' => "”",
1792 '\'' => "’",
1793 ' ' => " ",
1794 x => x.encode_utf8(&mut post_char_buf)
1795 };
1796 let found = &m["quoted"];
1797 let found: Cow<str> = if found.len() > 1 {
1798 self.glyphs(found).trim_end().to_owned().into()
1799 } else if found == "\"" {
1800 """.into()
1801 } else {
1802 found.into()
1803 };
1804 return self.shelve(format!(" {new_pre}{found}{new_post} "))
1805 }
1806 }
1807 unreachable!("Should be reached, check regular expression");
1808 };
1809 PATTERN_RE.replace_all(text, f_glyph_quoted_quote)
1810 }
1811
1812}
1813
1814
1815pub enum HtmlKind {
1818 XHTML,
1819 HTML5
1820}
1821
1822type AmmoniaConfigurator = dyn for <'a, 'b>
1823 Fn(&'a mut crate::ammonia::Builder<'b>) -> &'a crate::ammonia::Builder<'b>;
1824
1825pub struct Textile {
1837 uid: String,
1838 pub(crate) link_prefix: String,
1839 pub(crate) restricted: bool,
1840 pub(crate) raw_block_enabled: bool,
1841 pub(crate) align_class_enabled: Option<bool>,
1842 block_tags: bool,
1843 pub(crate) lite: bool,
1844 noimage: bool,
1845 get_sizes: bool,
1846 max_span_depth: u32,
1847 html_type: HtmlKind,
1848 rel: Option<String>,
1849 regex_cache: std::cell::RefCell<HashMap<u32, HashMap<&'static str, Regex>>>,
1850 dyn_glyph_replacers: [(Regex, String); 1],
1851 sanitizer_config: Option<Box<AmmoniaConfigurator>>,
1852}
1853
1854fn normalize_newlines(text: &str) -> String {
1855 lazy_static! {
1856 static ref CHANGES: [(Regex, &'static str); 2] = [
1857 (fregex!(r"\r\n?"), "\n"),
1858 (fregex!(r"(?m)^[ \t]*\n"), "\n"),
1859 ];
1860 }
1861 multi_replace(text.into(), CHANGES.iter().map(|i| (&i.0, i.1)))
1862 .trim_matches('\n')
1863 .into()
1864}
1865
1866fn time_based_uid() -> String {
1867 let now = SystemTime::now().duration_since(UNIX_EPOCH).unwrap();
1868 let mut hasher = DefaultHasher::new();
1869 hasher.write_u128(now.as_nanos());
1870 format!("{:x}", hasher.finish())
1871}
1872
1873const SNIP_NAB: &str = r"\p{Ll}";
1874lazy_static! {
1875 static ref DYN_3PLUS_RE: Regex = fregex!(
1876 &format!(
1877 concat!(
1878 r#"({space}|^|[>(;-])([{abr}]{{3,}})([{nab}]*)"#,
1879 r#"(?={space}|{pnct}|<|$)(?=[^">]*?(<|$))"#),
1880 space=SNIP_SPACE,
1881 abr=SNIP_ABR,
1882 nab=SNIP_NAB,
1883 pnct=PNCT_RE_S));
1884}
1885
1886impl Default for Textile {
1887 fn default() -> Self {
1888 let result = Textile {
1889 link_prefix: String::new(), uid: String::new(), restricted: false,
1892 raw_block_enabled: false,
1893 align_class_enabled: None,
1894 block_tags: true,
1895 lite: false,
1896 noimage: false,
1897 get_sizes: false,
1898 max_span_depth: 5,
1899 html_type: HtmlKind::HTML5,
1900 rel: None,
1901 sanitizer_config: None,
1902 regex_cache: std::cell::RefCell::new(Default::default()),
1903 dyn_glyph_replacers: [
1904 (DYN_3PLUS_RE.clone(), String::new()),
1907 ]
1908 };
1909 result.set_uid(&time_based_uid())
1910 }
1911}
1912
1913impl Textile {
1914
1915 pub fn parse(&self, text: &str) -> String {
1919
1920 if text.trim().is_empty() {
1921 return text.to_owned();
1922 }
1923
1924 let text = if self.restricted {
1925 Cow::Owned(encode_html(text, false, false))
1926 } else {
1927 Cow::Borrowed(text)
1928 };
1929
1930 let mut state = ParserState::new(self);
1931 let text = normalize_newlines(&text)
1932 .replace(&state.textile.uid, "");
1933
1934 let text = if self.block_tags {
1935 let text = state.block(&text);
1936 state.place_note_lists(&text).into_owned()
1937 } else {
1938 let text = text + "\n\n";
1939 let text = state.glyph_quoted_quote(&text);
1941 let text = state.span(&text);
1943 state.glyphs(&text).into_owned()
1946 };
1947
1948 let text = state.retrieve(text);
1949 let text = text.replace(
1950 &format!("{0}:glyph:", &state.textile.uid),
1951 "");
1952
1953 let text = state.retrieve_tags(&text);
1954 let text = state.retrieve_urls(&text);
1955
1956 let text = match self.sanitizer_config {
1957 Some(ref configurator) =>
1958 configurator(
1959 crate::ammonia::Builder::default().link_rel(None)
1960 )
1961 .clean(&text)
1962 .to_string()
1963 .into(),
1964 None => text,
1965 };
1966
1967 lazy_static! {
1970 static ref BR_PATTERN: Regex = fregex!(r"<br( /)?>(?!\n)");
1971 }
1972
1973 let text = BR_PATTERN.replace_all(
1974 &text,
1975 match self.html_type {
1976 HtmlKind::XHTML => "<br />\n",
1977 HtmlKind::HTML5 => "<br>\n",
1978 });
1979
1980 let text = text.trim_end_matches('\n');
1981
1982 text.to_string()
1983 }
1984
1985 pub fn set_getting_image_size(mut self, value: bool) -> Self {
1991 self.get_sizes = value;
1992 self
1993 }
1994
1995 pub fn set_block_tags(mut self, value: bool) -> Self {
1998 self.block_tags = value;
1999 self
2000 }
2001
2002 pub fn set_html_kind(mut self, html_type: HtmlKind) -> Self {
2009 self.html_type = html_type;
2010 self
2011 }
2012
2013 pub fn set_restricted(mut self, value: bool) -> Self {
2023 self.restricted = value;
2024 self
2025 }
2026
2027 pub fn set_lite(mut self, value: bool) -> Self {
2034 self.lite = value;
2035 self
2036 }
2037
2038 pub fn set_images(mut self, value: bool) -> Self {
2040 self.noimage = !value;
2041 self
2042 }
2043
2044 pub fn set_rel<S>(mut self, value: Option<S>) -> Self where S: AsRef<str> {
2048 self.rel = value.map(|v| v.as_ref().to_owned());
2049 self
2050 }
2051
2052 pub fn set_align_class(mut self, value: bool) {
2059 self.align_class_enabled = Some(value);
2060 }
2061
2062 pub fn set_raw_blocks(mut self, value: bool) -> Self {
2068 self.raw_block_enabled = value;
2069 self
2070 }
2071
2072 pub fn set_sanitize(mut self, enable: bool) -> Self {
2085 if enable {
2086 self.adjust_sanitizer(|sanitizer| sanitizer)
2087 } else {
2088 self.sanitizer_config = None;
2089 self
2090 }
2091 }
2092
2093 pub fn adjust_sanitizer<F>(mut self, configurator: F) -> Self
2108 where for <'a, 'b> F: Fn(&'a mut crate::ammonia::Builder<'b>) -> &'a crate::ammonia::Builder<'b> + 'a
2109 {
2110 self.sanitizer_config = Some(Box::new(configurator));
2111 self
2112 }
2113
2114 pub fn set_uid(mut self, base_id: &str) -> Self {
2121 self.uid = format!("textileRef:{0}:", base_id);
2122 self.link_prefix = format!("{0}-", base_id);
2123 let dyn_3plus_replacement = format!(
2124 r#"$1<span class="caps">{0}:glyph:$2</span>$3"#,
2125 &self.uid);
2126 self.dyn_glyph_replacers = [
2127 (DYN_3PLUS_RE.clone(), dyn_3plus_replacement),
2129 ];
2130 self
2131 }
2132
2133 pub(crate) fn proper_br_tag(&self) -> &'static str {
2134 match self.html_type {
2135 HtmlKind::XHTML => "<br />",
2136 HtmlKind::HTML5 => "<br>",
2137 }
2138 }
2139}
2140
2141#[cfg(test)]
2142mod test {
2143 use super::get_image_size;
2144
2145 #[test]
2146 fn test_get_image_size() {
2147 let url = "https://en.wikipedia.org/favicon.ico";
2149 let size = get_image_size(url);
2150 assert_ne!(size, None);
2151 if let Some((width, height)) = size {
2152 assert!(width > 0);
2153 assert!(height > 0);
2154 }
2155
2156 let size = get_image_size("../picture.jpg");
2158 assert_eq!(size, None);
2159 }
2160
2161 #[test]
2162 fn test_footnote_ref() {
2163 let t = super::Textile::default();
2164 let mut state = super::ParserState::new(&t);
2165 let result = state.footnote_ref("foo[1]");
2166 let expect = format!(
2167 "foo<sup class=\"footnote\" id=\"fnrev{0}1\"><a href=\"#fn{0}1\">1</a></sup>",
2168 t.link_prefix);
2169 assert_eq!(result, expect);
2170 }
2171}