1use super::*;
6
7use indexmap::IndexMap;
8
9use pulldown_cmark::{Event, LinkType, Options, Parser, Tag, TagEnd};
10
11use crate::util::sub_chars;
12use crate::Span;
13use crate::{CheckableChunk, Range};
14
15#[derive(Debug, Clone)]
19pub enum SourceRange {
20 Direct(Range),
21 Alias(Range, String),
22}
23
24impl SourceRange {
25 #[allow(dead_code)]
28 pub(crate) fn apply_offset(&mut self, offset: usize) {
29 match self {
30 Self::Direct(range) => apply_offset(range, offset),
31 Self::Alias(range, _) => apply_offset(range, offset),
32 }
33 }
34
35 pub fn range(&self) -> Range {
39 match self {
40 Self::Direct(range) => range.clone(),
41 Self::Alias(range, _) => range.clone(),
42 }
43 }
44}
45
46impl std::ops::Deref for SourceRange {
47 type Target = Range;
48 fn deref(&self) -> &Self::Target {
49 match self {
50 Self::Direct(range) => range,
51 Self::Alias(range, _) => range,
52 }
53 }
54}
55
56pub(crate) fn is_html_tag_on_no_scope_list(text: &str) -> bool {
57 use regex::RegexSet;
58 lazy_static::lazy_static! {
59 static ref HTML_TAG_EMPTY_OR_SPECIAL_CASE: RegexSet = RegexSet::new([
60 r####"^<\s*[A-Za-z0-9]+(?:\s+.*)*\s*/>$"####, r####"^<\s*br\s*>$"####,
62 r####"^</?\s*(?:i|b|span|font|color|style)\s*/?>$"####,
63 r####"^<\s*pre\s*>.*</\s*pre\s*>\s?$"####,
64 ]).unwrap();
65 };
66 HTML_TAG_EMPTY_OR_SPECIAL_CASE.is_match(text)
67}
68
69#[test]
70fn scoped() {
71 assert_eq!(false, is_html_tag_on_no_scope_list("<code>"));
72 assert_eq!(false, is_html_tag_on_no_scope_list("</code>"));
73 assert_eq!(true, is_html_tag_on_no_scope_list("<code />"));
74 assert_eq!(true, is_html_tag_on_no_scope_list("<pre>🌡</pre>\n"));
75}
76
77#[derive(Clone)]
79pub struct PlainOverlay<'a> {
80 raw: &'a CheckableChunk,
82 plain: String,
84 mapping: IndexMap<Range, SourceRange>,
88}
89
90impl<'a> PlainOverlay<'a> {
91 fn track(
94 s: &str,
95 cmark_range: SourceRange,
96 plain_acc: &mut String,
97 mapping: &mut IndexMap<Range, SourceRange>,
98 ) {
99 let cursor = plain_acc.chars().count();
105 let plain_range = match &cmark_range {
106 SourceRange::Alias(_range, alias) => {
107 if alias.is_empty() {
108 log::debug!("Alias for {s:?} was empty. Ignoring.");
109 return;
110 }
111 let alias16 = &alias[..std::cmp::min(alias.len(), 16)];
114 plain_acc.push_str(alias16);
115 Range {
116 start: cursor,
117 end: cursor + alias16.len(),
118 }
119 }
120 SourceRange::Direct(_range) => {
121 plain_acc.push_str(s);
122 Range {
123 start: cursor,
124 end: cursor + s.chars().count(),
125 }
126 }
127 };
128 let _ = mapping.insert(plain_range, cmark_range);
129 }
130
131 fn newlines(plain: &mut String, n: usize) {
133 for _ in 0..n {
134 plain.push('\n');
135 }
136 }
137
138 pub fn extract_plain_with_mapping(
140 cmark: &str,
141 ignores: &Ignores,
142 ) -> (String, IndexMap<Range, SourceRange>) {
143 let mut plain = String::with_capacity(cmark.len());
144 let mut mapping = indexmap::IndexMap::with_capacity(128);
145
146 let broken_link_handler = &mut |_broken: pulldown_cmark::BrokenLink| -> Option<(
147 pulldown_cmark::CowStr,
148 pulldown_cmark::CowStr,
149 )> {
150 Some((
151 pulldown_cmark::CowStr::Borrowed(""),
152 pulldown_cmark::CowStr::Borrowed(""),
153 ))
154 };
155 let parser = Parser::new_with_broken_link_callback(
156 cmark,
157 Options::all() ^ Options::ENABLE_SMART_PUNCTUATION,
158 Some(broken_link_handler),
159 );
160
161 let rust_fence =
162 pulldown_cmark::CodeBlockKind::Fenced(pulldown_cmark::CowStr::Borrowed("rust"));
163
164 let mut html_block = 0_usize;
165 let mut code_block = 0_usize;
166 let mut html_code_block = 0_usize;
167 let mut inception = false;
168 let mut skip_link_text = false;
169 let mut skip_table_text = false;
170
171 for (event, byte_range) in parser.into_offset_iter() {
172 if byte_range.start > byte_range.end {
173 log::warn!(
174 "Dropping event {event:?} due to negative byte range {byte_range:?}, see {}",
175 "https://github.com/raphlinus/pulldown-cmark/issues/478"
176 );
177 continue;
178 }
179
180 log::trace!("Parsing event (bytes: {byte_range:?}): {event:?}");
181
182 let cursor = cmark.char_indices().enumerate().peekable();
183 let mut char_cursor = 0usize;
184
185 for (char_idx, (byte_offset, _c)) in cursor {
187 char_cursor = char_idx;
188 if byte_offset >= byte_range.start {
189 break;
190 }
191 }
192 let char_range = {
195 let bytes_start = std::cmp::min(byte_range.start, cmark.len());
196 let bytes_end = std::cmp::min(byte_range.end, cmark.len());
197 assert!(bytes_start <= bytes_end);
198 let char_count = cmark[bytes_start..bytes_end].chars().count();
199 char_cursor..(char_cursor + char_count)
200 };
201
202 match event {
203 Event::InlineHtml(html) => {
204 if html.starts_with("<code") {
205 html_code_block += 1;
206 } else if html.ends_with("code>") {
207 html_code_block = html_code_block.saturating_sub(1);
208 }
209 }
210 Event::InlineMath(_s) => {
211 }
213 Event::DisplayMath(_s) => {
214 }
216 Event::Start(tag) => match tag {
217 Tag::Table(_alignments) => {
218 skip_table_text = true;
219 }
220 Tag::TableCell | Tag::TableHead | Tag::TableRow => {}
221 Tag::CodeBlock(fenced) => {
222 code_block += 1;
223 inception = fenced == rust_fence;
224 }
225 Tag::Link {
226 link_type,
227 dest_url: _,
228 title: _,
229 id: _,
230 } => {
231 skip_link_text = match link_type {
232 LinkType::ReferenceUnknown
233 | LinkType::Reference
234 | LinkType::Inline
235 | LinkType::Collapsed
236 | LinkType::CollapsedUnknown
237 | LinkType::Shortcut
238 | LinkType::ShortcutUnknown => false,
239 LinkType::Autolink | LinkType::Email => true,
240 };
241 }
242 Tag::List(_) => {
243 Self::newlines(&mut plain, 1);
245 }
246 Tag::Image {
247 link_type: _,
248 dest_url: _,
249 title,
250 id: _,
251 } => {
252 Self::track(
253 &title,
254 SourceRange::Direct(char_range),
255 &mut plain,
256 &mut mapping,
257 );
258 }
259 _ => {}
260 },
261 Event::End(tag) => {
262 match tag {
263 TagEnd::Table { .. } => {
264 skip_table_text = false;
265 Self::newlines(&mut plain, 1);
266 }
267 TagEnd::Link => {
268 }
270 TagEnd::Image => {}
271 TagEnd::Heading(_level) => {
272 Self::newlines(&mut plain, 2);
273 }
274 TagEnd::CodeBlock => {
275 code_block = code_block.saturating_sub(1);
276
277 }
281 TagEnd::Paragraph => Self::newlines(&mut plain, 2),
282
283 TagEnd::Item => {
284 Self::newlines(&mut plain, 1);
286 }
287 _ => {}
288 }
289 }
290 Event::Text(s) => {
291 if html_block > 0 {
292 } else if html_code_block > 0 {
293 } else if code_block > 0 {
294 if inception {
295 }
308 } else if skip_link_text {
309 skip_link_text = false
310 } else if !skip_table_text {
311 Self::track(
312 &s,
313 SourceRange::Direct(char_range),
314 &mut plain,
315 &mut mapping,
316 );
317 }
318 }
319 Event::Code(s) => {
320 let shortened_range = Range {
323 start: char_range.start.saturating_add(1),
324 end: char_range.end.saturating_sub(1),
325 };
326 let alias = cmark[byte_range]
327 .chars()
328 .skip(1)
329 .take(shortened_range.len())
330 .filter(|x| x.is_ascii_alphanumeric())
331 .collect::<String>();
332
333 if !shortened_range.is_empty() && !alias.is_empty() {
334 Self::track(
335 &s,
336 SourceRange::Alias(shortened_range, alias),
337 &mut plain,
338 &mut mapping,
339 );
340 }
341 }
342 Event::Html(tag) => {
343 if is_html_tag_on_no_scope_list(&tag) {
344 } else if tag.ends_with("/>") {
345 html_block = html_block.saturating_sub(1);
346 } else {
347 html_block += 1;
348 }
349 }
350 Event::FootnoteReference(s) => {
351 if !ignores.footnote_references && !s.is_empty() {
352 let char_range = Range {
353 start: char_range.start + 2,
354 end: char_range.end - 1,
355 };
356 Self::track(
357 &s,
358 SourceRange::Direct(char_range),
359 &mut plain,
360 &mut mapping,
361 );
362 }
363 }
364 Event::SoftBreak => {
365 Self::newlines(&mut plain, 1);
366 }
367 Event::HardBreak => {
368 Self::newlines(&mut plain, 2);
369 }
370 Event::Rule => {
371 Self::newlines(&mut plain, 1);
372 }
373 Event::TaskListMarker(_checked) => {}
374 }
375 }
376
377 let trailing_newlines = plain.chars().rev().take_while(|x| *x == '\n').count();
380 if trailing_newlines <= plain.len() {
381 plain.truncate(plain.len() - trailing_newlines)
382 }
383 if let Some((mut plain_range, raw_range)) = mapping.pop() {
384 if plain_range.end > plain.len() {
385 plain_range.end = plain.len();
386 }
387 if plain_range.start > plain_range.end {
388 let content = String::from_iter(
389 cmark
390 .char_indices()
391 .filter(|(idx, _c)| raw_range.contains(idx))
392 .map(|(_idx, c)| c),
393 );
394 panic!(
395 "failed: {} <= {}, raw range: {:?}\ncontent: >>{}<<",
396 plain_range.start, plain_range.end, raw_range, content
397 );
398 }
399 mapping.insert(plain_range, raw_range);
400 }
401 (plain, mapping)
402 }
403
404 pub fn erase_cmark(chunk: &'a CheckableChunk, ignores: &Ignores) -> Self {
409 let (plain, mapping) = Self::extract_plain_with_mapping(chunk.as_str(), ignores);
410 Self {
411 raw: chunk,
412 plain,
413 mapping,
414 }
415 }
416
417 pub fn find_spans(&self, condensed_range: Range) -> IndexMap<Range, Span> {
421 let mut active = false;
422 let Range { start, end } = condensed_range;
423 let n = self.mapping.len();
424 self.mapping
425 .iter()
426 .skip_while(|(sub, _raw)| sub.end <= start)
427 .take_while(|(sub, _raw)| sub.start < end)
428 .inspect(|x| {
429 log::trace!(">>> item {:?} ∈ {:?}", condensed_range, x.0);
430 })
431 .filter(|(sub, _)| {
432 !sub.is_empty()
434 })
435 .filter(|(_, raw)| {
436 if let SourceRange::Direct(_) = raw {
438 true
439 } else {
440 false
441 }
442 })
443 .fold(
444 IndexMap::<Range, Span>::with_capacity(n),
445 |mut acc, (sub, raw)| {
446 fn recombine(range: Range, offset: usize, len: usize) -> Range {
447 Range {
448 start: range.start + offset,
449 end: range.start + offset + len,
450 }
451 }
452
453 let _ = if sub.contains(&start) {
454 let offset = start - sub.start;
457 let overlay_range = if sub.contains(&(end - 1)) {
458 active = false;
460 start..end
461 } else {
462 active = true;
464 start..sub.end
465 };
466 let raw = recombine(raw.range(), offset, overlay_range.len());
467 Some((overlay_range, raw))
468 } else {
478 None
479 }
480 .map(|(sub, raw)| {
481 log::trace!("convert: cmark-erased={sub:?} -> raw={raw:?}");
482
483 if raw.is_empty() {
484 log::warn!("linear range to spans: {raw:?} empty!");
485 } else {
486 let resolved = self.raw.find_spans(raw.clone());
487 log::trace!("cmark-erased range to spans: {raw:?} -> {resolved:?}");
488 acc.extend(resolved);
489 }
490 });
491 acc
492 },
493 )
494 }
495
496 pub fn as_str(&self) -> &str {
498 self.plain.as_str()
499 }
500}
501
502use std::fmt;
503
504impl<'a> fmt::Display for PlainOverlay<'a> {
505 fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
506 formatter.write_str(self.plain.as_str())
507 }
508}
509
510impl<'a> fmt::Debug for PlainOverlay<'a> {
511 fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
512 use console::Style;
513
514 let styles = [
515 Style::new().italic().bold().dim().red(),
516 Style::new().italic().bold().dim().green(),
517 Style::new().italic().bold().dim().yellow(),
518 Style::new().italic().bold().dim().magenta(),
519 Style::new().italic().bold().dim().cyan(),
520 ];
521
522 let uncovered = Style::new().bold().on_black().dim().white();
523
524 let color_cycle = styles.iter().cycle();
525
526 let commonmark = self.raw.as_str().to_owned();
527
528 let mut coloured_plain = String::with_capacity(1024);
529 let mut coloured_md = String::with_capacity(1024);
530
531 let mut previous_md_end = 0usize;
532 for (plain_range, md_range, style) in
533 itertools::cons_tuples(self.mapping.iter().zip(color_cycle))
534 {
535 let delta = md_range.start.saturating_sub(previous_md_end);
537 if delta > 0 {
539 let s = sub_chars(commonmark.as_str(), previous_md_end..md_range.start);
540 coloured_md.push_str(uncovered.apply_to(s.as_str()).to_string().as_str());
541 }
542 previous_md_end = md_range.end;
543
544 let s = sub_chars(commonmark.as_str(), md_range.range());
545 coloured_md.push_str(style.apply_to(s.as_str()).to_string().as_str());
546
547 let s = sub_chars(self.plain.as_str(), plain_range.clone());
548 coloured_plain.push_str(style.apply_to(s.as_str()).to_string().as_str());
549 }
550 writeln!(formatter, "Commonmark:\n{coloured_md}")?;
553 writeln!(formatter, "Plain:\n{coloured_plain}")?;
554 Ok(())
555 }
556}
557
558#[derive(Clone, Default)]
561pub struct Ignores {
562 pub footnote_references: bool,
564}