doc_chunks/
chunk.rs

1//! Chunk definition for what is going to be processed by the checkers
2//!
3//! A chunk consists of multiple consecutive literals joined by newlines.
4
5use super::*;
6
7use indexmap::IndexMap;
8use std::convert::TryFrom;
9use std::fmt;
10use std::path::Path;
11
12use crate::{
13    util::{sub_char_range, sub_chars},
14    Range, Span,
15};
16use crate::{Ignores, PlainOverlay};
17
18/// Definition of the source of a checkable chunk
19#[derive(Debug, Clone, Hash, Eq, PartialEq)]
20pub enum ContentOrigin {
21    /// A `Cargo.toml` manifest that contains a `description` field.
22    CargoManifestDescription(PathBuf),
23    /// A common mark file at given path.
24    CommonMarkFile(PathBuf),
25    /// A rustdoc comment, part of file reference by path in span.
26    RustDocTest(PathBuf, Span),
27    /// Full rust source file.
28    RustSourceFile(PathBuf),
29    /// A test entity for a rust file, with no meaning outside of test.
30    TestEntityRust,
31    /// A test entity for a cmark file, with no meaning outside of test.
32    TestEntityCommonMark,
33}
34
35impl ContentOrigin {
36    /// Represent the content origin as [path](std::path::PathBuf).
37    ///
38    /// For unit and integration tests, two additional hardcoded variants are
39    /// available, which resolve to static paths: `TestEntityRust` variant
40    /// becomes `/tmp/test/entity.rs`, `TestEntityCommonMark` variant becomes
41    /// `/tmp/test/entity.md`.
42    pub fn as_path(&self) -> &Path {
43        match self {
44            Self::CargoManifestDescription(path) => path.as_path(),
45            Self::CommonMarkFile(path) => path.as_path(),
46            Self::RustDocTest(path, _) => path.as_path(),
47            Self::RustSourceFile(path) => path.as_path(),
48            Self::TestEntityCommonMark => {
49                lazy_static::lazy_static! {
50                    static ref TEST_ENTITY_CMARK: PathBuf = PathBuf::from("/tmp/test/entity.md");
51                };
52                TEST_ENTITY_CMARK.as_path()
53            }
54            Self::TestEntityRust => {
55                lazy_static::lazy_static! {
56                    static ref TEST_ENTITY_RUST: PathBuf = PathBuf::from("/tmp/test/entity.rs");
57                };
58                TEST_ENTITY_RUST.as_path()
59            }
60        }
61    }
62}
63
64impl fmt::Display for ContentOrigin {
65    fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
66        write!(formatter, "{}", self.as_path().display())
67    }
68}
69
70/// A chunk of documentation that is supposed to be checked.
71#[derive(Debug, Clone, Eq, PartialEq)]
72pub struct CheckableChunk {
73    /// Rendered contents of a literal set or just content of a markdown file,
74    /// e.g. a comment of two lines is represented as ' First Line\n second
75    /// line' in `rendered` where the whitespaces are preserved.
76    content: String,
77    /// Mapping from range within `content` and `Span` referencing the location
78    /// within the source file. For a markdown file i.e. this would become a
79    /// single entry spanning from start to end.
80    source_mapping: IndexMap<Range, Span>,
81    /// Track what kind of comment the chunk is.
82    variant: CommentVariant,
83}
84
85impl std::hash::Hash for CheckableChunk {
86    fn hash<H: std::hash::Hasher>(&self, hasher: &mut H) {
87        self.content.hash(hasher);
88        // order is consistent
89        self.source_mapping.iter().for_each(|t| {
90            t.hash(hasher);
91        });
92        self.variant.hash(hasher);
93    }
94}
95
96impl CheckableChunk {
97    /// Specific to rust source code, either as part of doc test comments or
98    /// file scope.
99    pub fn from_literalset(set: LiteralSet) -> Self {
100        set.into_chunk()
101    }
102
103    /// Load content from string, may contain common mark content.
104    pub fn from_str(
105        content: &str,
106        source_mapping: IndexMap<Range, Span>,
107        variant: CommentVariant,
108    ) -> Self {
109        Self::from_string(content.to_string(), source_mapping, variant)
110    }
111
112    /// Load content from string, may contain common mark content.
113    pub fn from_string(
114        content: String,
115        source_mapping: IndexMap<Range, Span>,
116        variant: CommentVariant,
117    ) -> Self {
118        Self {
119            content,
120            source_mapping,
121            variant,
122        }
123    }
124
125    /// Find which part of the range maps to which span. Note that Range can
126    /// very well be split into multiple fragments where each of them can be
127    /// mapped to a potentially non-continuous span.
128    ///
129    /// Example:
130    ///
131    /// ```text,ignore
132    /// 0..40 -> [
133    ///           (0,10) => (1,0)->(3,5),
134    ///           (10,12) => (3,6)->(3,7),
135    ///           (13,17) => (4,0)->(4,3),
136    /// ]
137    /// ```
138    pub fn find_spans(&self, range: Range) -> IndexMap<Range, Span> {
139        log::trace!(target: "find_spans", "Chunk find_span {range:?}");
140
141        let Range { start, end } = range;
142        self.source_mapping
143            .iter()
144            .skip_while(|(fragment_range, _span)| fragment_range.end <= start)
145            .take_while(|(fragment_range, _span)| fragment_range.start < end)
146            .inspect(|x| {
147                log::trace!(target: "find_spans", ">>> item {:?} ∈ {:?}", range, x.0);
148            })
149            .filter(|(fragment_range, _)| {
150                // could possibly happen on empty documentation lines with `///`
151                !fragment_range.is_empty()
152            })
153            .map(|(fragment_range, fragment_span)| {
154                // trim range so we only capture the relevant part
155                let sub_fragment_range = std::cmp::max(fragment_range.start, range.start)
156                    ..std::cmp::min(fragment_range.end, range.end);
157                (fragment_span, fragment_range, sub_fragment_range)
158            })
159            .inspect(|(fragment_span, fragment_range, sub_fragment_range)| {
160                let (fragment_span, fragment_range, sub_fragment_range) =
161                    (fragment_span, fragment_range, sub_fragment_range.clone());
162                log::trace!(target: "find_spans",
163                    ">> fragment: span: {fragment_span:?} => range: {fragment_range:?} | sub: {range:?} -> sub_fragment: {sub_fragment_range:?}",
164                );
165
166                log::trace!(target: "find_spans",
167                    "[f]display;\n>{}<",
168                    ChunkDisplay::from((self, *fragment_range))
169                );
170                log::trace!(target: "find_spans",
171                    "[f]content;\n>{}<",
172                    sub_chars(self.as_str(), (*fragment_range).clone())
173                );
174            })
175            .filter_map(|(fragment_span, fragment_range, sub_fragment_range)| {
176                if sub_fragment_range.is_empty() {
177                    log::trace!(target: "find_spans","sub fragment is zero, dropping!");
178                    return None;
179                }
180                if let Some(span_len) = fragment_span.one_line_len() {
181                    debug_assert_eq!(span_len, fragment_range.len());
182                }
183                Some((fragment_span, fragment_range, sub_fragment_range))
184            })
185            .filter_map(|(fragment_span, fragment_range, sub_fragment_range)| {
186                // take the full fragment string, we need to count newlines before and after
187                let s = sub_char_range(self.as_str(), fragment_range.clone());
188
189                // relative to the range given / offset
190                let shift = sub_fragment_range.start - fragment_range.start;
191                // state
192                let mut sub_fragment_span = *fragment_span;
193                let mut cursor: LineColumn = fragment_span.start;
194                let mut iter = s.chars().enumerate().peekable();
195                let mut started = true;
196                'w: while let Some((idx, c)) = iter.next() {
197                    if idx == shift {
198                        sub_fragment_span.start = cursor;
199                        started = true;
200                    }
201                    if idx >= (sub_fragment_range.len() + shift - 1) {
202                        sub_fragment_span.end = cursor;
203                        break 'w;
204                    }
205                    if iter.peek().is_none() && started {
206                        sub_fragment_span.end = cursor;
207                    }
208                    // FIXME what about \n\r or \r\n or \r ?
209                    match c {
210                        '\n' => {
211                            cursor.line += 1;
212                            cursor.column = 0;
213                        }
214                        _ => cursor.column += 1,
215                    }
216                }
217
218                if let Some(sub_fragment_span_len) = sub_fragment_span.one_line_len() {
219                    debug_assert_eq!(sub_fragment_span_len, sub_fragment_range.len());
220                }
221                log::trace!(
222                    ">> sub_fragment range={sub_fragment_range:?} span={sub_fragment_span:?} => {}",
223                    self.display(sub_fragment_range.clone()),
224                );
225
226                Some((sub_fragment_range, sub_fragment_span))
227            })
228            .collect::<IndexMap<_, _>>()
229    }
230
231    /// Extract all spans which at least partially overlap with range, i.e.
232    /// report all spans that either
233    ///  - contain `range.start`
234    ///  - contain `range.end`
235    ///  - are totally enclosed in `range`
236    ///
237    /// Example:
238    ///
239    /// Below setup results in `[s2, s3, s4]`
240    ///
241    /// ```text,ignore
242    /// |-- s1 --|-- s2 --|-- s3 --|-- s4 --|
243    ///             |----- range -----|
244    /// ```
245    ///
246    /// Attention:
247    ///
248    /// For large `#[doc="long multiline text"]` comments, the covered span
249    /// might be large (i.e. just one single) which leads to a surprising result
250    /// of just one span for a relatively small input `range`.
251    ///
252    /// Below setup results in `[s0]`
253    ///
254    /// ```text,ignore
255    /// |---...--- s0 ----------------------...---|
256    ///             |--- range ---|
257    /// ```
258    ///
259    pub fn find_covered_spans(&self, range: Range) -> impl Iterator<Item = &'_ Span> {
260        let Range { start, end } = range;
261        self.source_mapping
262            .iter()
263            .skip_while(move |(fragment_range, _)| fragment_range.end <= start)
264            .take_while(move |(fragment_range, _)| fragment_range.start <= end)
265            .filter_map(|(fragment_range, fragment_span)| {
266                // could possibly happen on empty documentation lines with `///`
267                // TODO: is_empty() throws disambiguity error
268                if fragment_range.is_empty() {
269                    None
270                } else {
271                    Some(fragment_span)
272                }
273            })
274    }
275
276    /// Yields a set of ranges covering all spanned lines (the full line).
277    pub fn find_covered_lines(&self, range: Range) -> Vec<Range> {
278        // assumes the _mistake_ is within one line
279        // if not we chop it down to the first line
280        let mut acc = Vec::with_capacity(32);
281        let mut iter = self.as_str().chars().enumerate();
282
283        let mut last_newline_idx = 0usize;
284        // simulate the previous newline was at virtual `-1`
285        let mut state_idx = 0usize;
286        let mut state_c = '\n';
287        loop {
288            if let Some((idx, c)) = iter.next() {
289                if c == '\n' {
290                    if range.start <= idx {
291                        // do not include the newline
292                        acc.push(last_newline_idx..idx);
293                    }
294                    last_newline_idx = idx + 1;
295                    if last_newline_idx >= range.end {
296                        break;
297                    }
298                }
299                state_c = c;
300                state_idx = idx;
301            } else {
302                // if the previous character was a new line,
303                // such that the common mark chunk ended with
304                // a newline, we do not want to append another empty line
305                // for no reason, we include empty lines for `\n\n` though
306                if state_c != '\n' {
307                    // we want to include the last character
308                    acc.push(last_newline_idx..(state_idx + 1));
309                }
310                break;
311            };
312        }
313        acc
314    }
315
316    /// Extract the overall length of all covered lines as they appear in the
317    /// origin.
318    pub fn extract_line_lengths(&self) -> Result<Vec<usize>> {
319        let line_ranges = self.find_covered_lines(0..self.len_in_chars());
320        let lengths = line_ranges
321            .iter()
322            .try_fold(Vec::new(), |mut acc, line_range| {
323                let spans = self.find_spans(line_range.clone());
324                if let Some(span) = spans.get(line_range) {
325                    acc.push(span.start.column + line_range.len());
326                    Ok(acc)
327                } else if let Some(span) = self.source_mapping.get(line_range) {
328                    // if the span was not found, it should still be in the whole source mapping
329                    acc.push(span.start.column + line_range.len());
330                    Ok(acc)
331                } else {
332                    Err(Error::InvalidLineRange {
333                        line_range: line_range.clone(),
334                        source_mapping: self.source_mapping.clone(),
335                    })
336                }
337            })?;
338
339        Ok(lengths)
340    }
341
342    /// Obtain the content as `str` representation.
343    pub fn as_str(&self) -> &str {
344        self.content.as_str()
345    }
346
347    /// Get the display wrapper type to be used with i.e. `format!(..)`.
348    pub fn display(&self, range: Range) -> ChunkDisplay {
349        ChunkDisplay::from((self, range))
350    }
351
352    /// Iterate over all ranges and the associated span.
353    pub fn iter(&self) -> indexmap::map::Iter<Range, Span> {
354        self.source_mapping.iter()
355    }
356
357    /// Number of fragments.
358    ///
359    /// A fragment is a continuous sub-string which is not split up any further.
360    pub fn fragment_count(&self) -> usize {
361        self.source_mapping.len()
362    }
363
364    /// Obtain an accessor object containing mapping and string representation,
365    /// removing the markdown annotations.
366    pub fn erase_cmark(&self, ignores: &Ignores) -> PlainOverlay {
367        PlainOverlay::erase_cmark(self, ignores)
368    }
369
370    /// Obtain the length in characters.
371    pub fn len_in_chars(&self) -> usize {
372        self.content.chars().count()
373    }
374
375    /// The variant type of comment.
376    pub fn variant(&self) -> CommentVariant {
377        self.variant.clone()
378    }
379}
380
381/// Convert the clusters of one file into a source description as well as well
382/// as vector of checkable chunks.
383impl From<Clusters> for Vec<CheckableChunk> {
384    fn from(clusters: Clusters) -> Vec<CheckableChunk> {
385        clusters
386            .set
387            .into_iter()
388            .map(CheckableChunk::from_literalset)
389            .collect::<Vec<_>>()
390    }
391}
392
393/// A display style wrapper for a trimmed literal.
394///
395/// Allows better display of coverage results without code duplication.
396///
397/// Consists of literal reference and a relative range to the start of the
398/// literal.
399#[derive(Debug, Clone)]
400pub struct ChunkDisplay<'a>(pub &'a CheckableChunk, pub Range);
401
402impl<'a, C> From<(C, &Range)> for ChunkDisplay<'a>
403where
404    C: Into<&'a CheckableChunk>,
405{
406    fn from(tuple: (C, &Range)) -> Self {
407        let tuple0 = tuple.0.into();
408        Self(tuple0, tuple.1.clone())
409    }
410}
411
412impl<'a, C> From<(C, Range)> for ChunkDisplay<'a>
413where
414    C: Into<&'a CheckableChunk>,
415{
416    fn from(tuple: (C, Range)) -> Self {
417        let tuple0 = tuple.0.into();
418        Self(tuple0, tuple.1)
419    }
420}
421
422impl<'a, R> TryFrom<(R, Span)> for ChunkDisplay<'a>
423where
424    R: Into<&'a CheckableChunk>,
425{
426    type Error = Error;
427    fn try_from(tuple: (R, Span)) -> Result<Self> {
428        let chunk = tuple.0.into();
429        let span = tuple.1;
430        let range = span.to_content_range(chunk)?;
431        Ok(Self(chunk, range))
432    }
433}
434
435impl<'a> From<ChunkDisplay<'a>> for (&'a CheckableChunk, Range) {
436    fn from(val: ChunkDisplay<'a>) -> Self {
437        (val.0, val.1)
438    }
439}
440
441impl<'a> fmt::Display for ChunkDisplay<'a> {
442    fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
443        use console::Style;
444
445        // the contextual characters not covered by range `self.1`
446        let context = Style::new().on_black().bold().cyan();
447        // highlight the mistake
448        let highlight = Style::new().on_black().bold().underlined().red().italic();
449        // a special style for any errors, to visualize out of bounds access
450        let oob = Style::new().blink().bold().on_yellow().red();
451
452        // simplify
453        let literal = self.0;
454        let start = self.1.start;
455        let end = self.1.end;
456
457        assert!(start <= end);
458
459        // content without quote characters
460        let data = literal.as_str();
461
462        // colour the preceding quote character
463        // and the context preceding the highlight
464        let s = sub_chars(data, 0..start);
465        let ctx1 = if start < literal.len_in_chars() {
466            context.apply_to(s.as_str())
467        } else {
468            oob.apply_to("!!!")
469        };
470
471        // highlight the given range
472        let s = sub_chars(data, start..end);
473        let highlight = if end > literal.len_in_chars() {
474            oob.apply_to(s.as_str())
475        } else {
476            highlight.apply_to(s.as_str())
477        };
478
479        // color trailing context if any as well as the closing quote character
480        let s = sub_chars(data, end..literal.len_in_chars());
481        let ctx2 = if end <= literal.len_in_chars() {
482            context.apply_to(s.as_str())
483        } else {
484            oob.apply_to("!!!")
485        };
486
487        write!(formatter, "{ctx1}{highlight}{ctx2}")
488    }
489}