1use super::*;
6
7use indexmap::IndexMap;
8
9use pulldown_cmark::{Event, LinkType, Options, Parser, Tag, TagEnd};
10
11use crate::util::sub_chars;
12use crate::Span;
13use crate::{CheckableChunk, Range};
14
15#[derive(Debug, Clone)]
19pub enum SourceRange {
20 Direct(Range),
21 Alias(Range, String),
22}
23
24impl SourceRange {
25 #[allow(dead_code)]
28 pub(crate) fn apply_offset(&mut self, offset: usize) {
29 match self {
30 Self::Direct(range) => apply_offset(range, offset),
31 Self::Alias(range, _) => apply_offset(range, offset),
32 }
33 }
34
35 pub fn range(&self) -> Range {
39 match self {
40 Self::Direct(range) => range.clone(),
41 Self::Alias(range, _) => range.clone(),
42 }
43 }
44}
45
46impl std::ops::Deref for SourceRange {
47 type Target = Range;
48 fn deref(&self) -> &Self::Target {
49 match self {
50 Self::Direct(range) => range,
51 Self::Alias(range, _) => range,
52 }
53 }
54}
55
56pub(crate) fn is_html_tag_on_no_scope_list(text: &str) -> bool {
57 use regex::RegexSet;
58 lazy_static::lazy_static! {
59 static ref HTML_TAG_EMPTY_OR_SPECIAL_CASE: RegexSet = RegexSet::new([
60 r####"^<\s*[A-Za-z0-9]+(?:\s+.*)*\s*/>$"####, r####"^<\s*br\s*>$"####,
62 r####"^</?\s*(?:i|b|span|font|color|style)\s*/?>$"####,
63 r####"^<\s*pre\s*>.*</\s*pre\s*>\s?$"####,
64 ]).unwrap();
65 };
66 HTML_TAG_EMPTY_OR_SPECIAL_CASE.is_match(text)
67}
68
69#[test]
70fn scoped() {
71 assert_eq!(false, is_html_tag_on_no_scope_list("<code>"));
72 assert_eq!(false, is_html_tag_on_no_scope_list("</code>"));
73 assert_eq!(true, is_html_tag_on_no_scope_list("<code />"));
74 assert_eq!(true, is_html_tag_on_no_scope_list("<pre>🌡</pre>\n"));
75}
76
77#[derive(Clone)]
79pub struct PlainOverlay<'a> {
80 raw: &'a CheckableChunk,
82 plain: String,
84 mapping: IndexMap<Range, SourceRange>,
88}
89
90impl<'a> PlainOverlay<'a> {
91 fn track(
94 s: &str,
95 cmark_range: SourceRange,
96 plain_acc: &mut String,
97 mapping: &mut IndexMap<Range, SourceRange>,
98 ) {
99 let cursor = plain_acc.chars().count();
105 let plain_range = match &cmark_range {
106 SourceRange::Alias(_range, alias) => {
107 if alias.is_empty() {
108 log::debug!("Alias for {s:?} was empty. Ignoring.");
109 return;
110 }
111 let alias16 = &alias[..std::cmp::min(alias.len(), 16)];
114 plain_acc.push_str(alias16);
115 Range {
116 start: cursor,
117 end: cursor + alias16.len(),
118 }
119 }
120 SourceRange::Direct(_range) => {
121 plain_acc.push_str(s);
122 Range {
123 start: cursor,
124 end: cursor + s.chars().count(),
125 }
126 }
127 };
128 let _ = mapping.insert(plain_range, cmark_range);
129 }
130
131 fn newlines(plain: &mut String, n: usize) {
133 for _ in 0..n {
134 plain.push('\n');
135 }
136 }
137
138 pub fn extract_plain_with_mapping(
140 cmark: &str,
141 ignores: &Ignores,
142 ) -> (String, IndexMap<Range, SourceRange>) {
143 let mut plain = String::with_capacity(cmark.len());
144 let mut mapping = indexmap::IndexMap::with_capacity(128);
145
146 let broken_link_handler = &mut |_broken: pulldown_cmark::BrokenLink| -> Option<(
147 pulldown_cmark::CowStr,
148 pulldown_cmark::CowStr,
149 )> {
150 Some((
151 pulldown_cmark::CowStr::Borrowed(""),
152 pulldown_cmark::CowStr::Borrowed(""),
153 ))
154 };
155 let parser = Parser::new_with_broken_link_callback(
156 cmark,
157 Options::all() ^ Options::ENABLE_SMART_PUNCTUATION,
158 Some(broken_link_handler),
159 );
160
161 let rust_fence =
162 pulldown_cmark::CodeBlockKind::Fenced(pulldown_cmark::CowStr::Borrowed("rust"));
163
164 let mut html_block = 0_usize;
165 let mut code_block = 0_usize;
166 let mut html_code_block = 0_usize;
167 let mut inception = false;
168 let mut skip_link_text = false;
169 let mut skip_table_text = false;
170
171 for (event, byte_range) in parser.into_offset_iter() {
172 if byte_range.start > byte_range.end {
173 log::warn!(
174 "Dropping event {event:?} due to negative byte range {byte_range:?}, see {}",
175 "https://github.com/raphlinus/pulldown-cmark/issues/478"
176 );
177 continue;
178 }
179
180 log::trace!("Parsing event (bytes: {byte_range:?}): {event:?}");
181
182 let cursor = cmark.char_indices().enumerate().peekable();
183 let mut char_cursor = 0usize;
184
185 for (char_idx, (byte_offset, _c)) in cursor {
187 char_cursor = char_idx;
188 if byte_offset >= byte_range.start {
189 break;
190 }
191 }
192 let char_range = {
195 let bytes_start = std::cmp::min(byte_range.start, cmark.len());
196 let bytes_end = std::cmp::min(byte_range.end, cmark.len());
197 assert!(bytes_start <= bytes_end);
198 let char_count = cmark[bytes_start..bytes_end].chars().count();
199 char_cursor..(char_cursor + char_count)
200 };
201
202 match event {
203 Event::InlineHtml(html) => {
204 if html.starts_with("<code") {
205 html_code_block += 1;
206 } else if html.ends_with("code>") {
207 html_code_block = html_code_block.saturating_sub(1);
208 }
209 }
210 Event::InlineMath(_s) => {
211 }
213 Event::DisplayMath(_s) => {
214 }
216 Event::Start(tag) => match tag {
217 Tag::Table(_alignments) => {
218 skip_table_text = true;
219 }
220 Tag::TableCell | Tag::TableHead | Tag::TableRow => {}
221 Tag::CodeBlock(fenced) => {
222 code_block += 1;
223 inception = fenced == rust_fence;
224 }
225 Tag::HtmlBlock => {
226 html_block += 1;
227 }
228 Tag::Link {
229 link_type,
230 dest_url: _,
231 title: _,
232 id: _,
233 } => {
234 skip_link_text = match link_type {
235 LinkType::ReferenceUnknown
236 | LinkType::Reference
237 | LinkType::Inline
238 | LinkType::Collapsed
239 | LinkType::CollapsedUnknown
240 | LinkType::Shortcut
241 | LinkType::ShortcutUnknown => false,
242 LinkType::Autolink | LinkType::Email => true,
243 };
244 }
245 Tag::List(_) => {
246 Self::newlines(&mut plain, 1);
248 }
249 Tag::Image {
250 link_type: _,
251 dest_url: _,
252 title,
253 id: _,
254 } => {
255 Self::track(
256 &title,
257 SourceRange::Direct(char_range),
258 &mut plain,
259 &mut mapping,
260 );
261 }
262 _ => {}
263 },
264 Event::End(tag) => {
265 match tag {
266 TagEnd::Table => {
267 skip_table_text = false;
268 Self::newlines(&mut plain, 1);
269 }
270 TagEnd::Link => {
271 }
273 TagEnd::Image => {}
274 TagEnd::Heading(_level) => {
275 Self::newlines(&mut plain, 2);
276 }
277 TagEnd::CodeBlock => {
278 code_block = code_block.saturating_sub(1);
279
280 }
284 TagEnd::HtmlBlock => {
285 html_block = html_block.saturating_sub(1);
286 }
287 TagEnd::Paragraph => Self::newlines(&mut plain, 2),
288
289 TagEnd::Item => {
290 Self::newlines(&mut plain, 1);
292 }
293 _ => {}
294 }
295 }
296 Event::Text(s) => {
297 if html_block > 0 || html_code_block > 0 {
298 } else if code_block > 0 {
299 if inception {
300 }
313 } else if skip_link_text {
314 skip_link_text = false
315 } else if !skip_table_text {
316 Self::track(
317 &s,
318 SourceRange::Direct(char_range),
319 &mut plain,
320 &mut mapping,
321 );
322 }
323 }
324 Event::Code(s) => {
325 let shortened_range = Range {
328 start: char_range.start.saturating_add(1),
329 end: char_range.end.saturating_sub(1),
330 };
331 let alias = cmark[byte_range]
332 .chars()
333 .skip(1)
334 .take(shortened_range.len())
335 .filter(|x| x.is_ascii_alphanumeric())
336 .collect::<String>();
337
338 if !shortened_range.is_empty() && !alias.is_empty() {
339 Self::track(
340 &s,
341 SourceRange::Alias(shortened_range, alias),
342 &mut plain,
343 &mut mapping,
344 );
345 }
346 }
347 Event::Html(tag) => {
348 if is_html_tag_on_no_scope_list(&tag) {
349 }
350 }
351 Event::FootnoteReference(s) => {
352 if !ignores.footnote_references && !s.is_empty() {
353 let char_range = Range {
354 start: char_range.start + 2,
355 end: char_range.end - 1,
356 };
357 Self::track(
358 &s,
359 SourceRange::Direct(char_range),
360 &mut plain,
361 &mut mapping,
362 );
363 }
364 }
365 Event::SoftBreak => {
366 Self::newlines(&mut plain, 1);
367 }
368 Event::HardBreak => {
369 Self::newlines(&mut plain, 2);
370 }
371 Event::Rule => {
372 Self::newlines(&mut plain, 1);
373 }
374 Event::TaskListMarker(_checked) => {}
375 }
376 }
377
378 let trailing_newlines = plain.chars().rev().take_while(|x| *x == '\n').count();
381 if trailing_newlines <= plain.len() {
382 plain.truncate(plain.len() - trailing_newlines)
383 }
384 if let Some((mut plain_range, raw_range)) = mapping.pop() {
385 if plain_range.end > plain.len() {
386 plain_range.end = plain.len();
387 }
388 if plain_range.start > plain_range.end {
389 let content = String::from_iter(
390 cmark
391 .char_indices()
392 .filter(|(idx, _c)| raw_range.contains(idx))
393 .map(|(_idx, c)| c),
394 );
395 panic!(
396 "failed: {} <= {}, raw range: {:?}\ncontent: >>{}<<",
397 plain_range.start, plain_range.end, raw_range, content
398 );
399 }
400 mapping.insert(plain_range, raw_range);
401 }
402 (plain, mapping)
403 }
404
405 pub fn erase_cmark(chunk: &'a CheckableChunk, ignores: &Ignores) -> Self {
410 let (plain, mapping) = Self::extract_plain_with_mapping(chunk.as_str(), ignores);
411 Self {
412 raw: chunk,
413 plain,
414 mapping,
415 }
416 }
417
418 pub fn find_spans(&self, condensed_range: Range) -> IndexMap<Range, Span> {
422 let mut active = false;
423 let Range { start, end } = condensed_range;
424 let n = self.mapping.len();
425 self.mapping
426 .iter()
427 .skip_while(|(sub, _raw)| sub.end <= start)
428 .take_while(|(sub, _raw)| sub.start < end)
429 .inspect(|x| {
430 log::trace!(">>> item {:?} ∈ {:?}", condensed_range, x.0);
431 })
432 .filter(|(sub, _)| {
433 !sub.is_empty()
435 })
436 .filter(|(_, raw)| {
437 matches!(raw, SourceRange::Direct(_))
439 })
440 .fold(
441 IndexMap::<Range, Span>::with_capacity(n),
442 |mut acc, (sub, raw)| {
443 fn recombine(range: Range, offset: usize, len: usize) -> Range {
444 Range {
445 start: range.start + offset,
446 end: range.start + offset + len,
447 }
448 }
449
450 let _ = if sub.contains(&start) {
451 let offset = start - sub.start;
454 let overlay_range = if sub.contains(&(end - 1)) {
455 active = false;
457 start..end
458 } else {
459 active = true;
461 start..sub.end
462 };
463 let raw = recombine(raw.range(), offset, overlay_range.len());
464 Some((overlay_range, raw))
465 } else {
475 None
476 }
477 .map(|(sub, raw)| {
478 log::trace!("convert: cmark-erased={sub:?} -> raw={raw:?}");
479
480 if raw.is_empty() {
481 log::warn!("linear range to spans: {raw:?} empty!");
482 } else {
483 let resolved = self.raw.find_spans(raw.clone());
484 log::trace!("cmark-erased range to spans: {raw:?} -> {resolved:?}");
485 acc.extend(resolved);
486 }
487 });
488 acc
489 },
490 )
491 }
492
493 pub fn as_str(&self) -> &str {
495 self.plain.as_str()
496 }
497}
498
499use std::fmt;
500
501impl<'a> fmt::Display for PlainOverlay<'a> {
502 fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
503 formatter.write_str(self.plain.as_str())
504 }
505}
506
507impl<'a> fmt::Debug for PlainOverlay<'a> {
508 fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
509 use console::Style;
510
511 let styles = [
512 Style::new().italic().bold().dim().red(),
513 Style::new().italic().bold().dim().green(),
514 Style::new().italic().bold().dim().yellow(),
515 Style::new().italic().bold().dim().magenta(),
516 Style::new().italic().bold().dim().cyan(),
517 ];
518
519 let uncovered = Style::new().bold().on_black().dim().white();
520
521 let color_cycle = styles.iter().cycle();
522
523 let commonmark = self.raw.as_str().to_owned();
524
525 let mut coloured_plain = String::with_capacity(1024);
526 let mut coloured_md = String::with_capacity(1024);
527
528 let mut previous_md_end = 0usize;
529 for (plain_range, md_range, style) in
530 itertools::cons_tuples(self.mapping.iter().zip(color_cycle))
531 {
532 let delta = md_range.start.saturating_sub(previous_md_end);
534 if delta > 0 {
536 let s = sub_chars(commonmark.as_str(), previous_md_end..md_range.start);
537 coloured_md.push_str(uncovered.apply_to(s.as_str()).to_string().as_str());
538 }
539 previous_md_end = md_range.end;
540
541 let s = sub_chars(commonmark.as_str(), md_range.range());
542 coloured_md.push_str(style.apply_to(s.as_str()).to_string().as_str());
543
544 let s = sub_chars(self.plain.as_str(), plain_range.clone());
545 coloured_plain.push_str(style.apply_to(s.as_str()).to_string().as_str());
546 }
547 writeln!(formatter, "Commonmark:\n{coloured_md}")?;
550 writeln!(formatter, "Plain:\n{coloured_plain}")?;
551 Ok(())
552 }
553}
554
555#[derive(Clone, Default)]
558pub struct Ignores {
559 pub footnote_references: bool,
561}