lychee_lib/types/uri/
raw.rs

1use std::{fmt::Display, num::NonZeroUsize};
2
3/// A raw URI that got extracted from a document with a fuzzy parser.
4/// Note that this can still be invalid according to stricter URI standards
5#[derive(Clone, Debug, PartialEq, Eq, Hash)]
6pub struct RawUri {
7    /// Unparsed URI represented as a `String`. There is no guarantee that it
8    /// can be parsed into a URI object
9    pub text: String,
10    /// Name of the element that contained the URI (e.g. `a` for the <a> tag).
11    /// This is a way to classify links to make it easier to offer fine control
12    /// over the links that will be checked e.g. by trying to filter out links
13    /// that were found in unwanted tags like `<pre>` or `<code>`.
14    pub element: Option<String>,
15    /// Name of the attribute that contained the URI (e.g. `src`). This is a way
16    /// to classify links to make it easier to offer fine control over the links
17    /// that will be checked e.g. by trying to filter out links that were found
18    /// in unwanted attributes like `srcset` or `manifest`.
19    pub attribute: Option<String>,
20    /// The position of the URI in the document.
21    pub span: RawUriSpan,
22}
23
24impl Display for RawUri {
25    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
26        write!(f, "{:?} (Attribute: {:?})", self.text, self.attribute)
27    }
28}
29
30#[cfg(test)]
31impl From<(&str, RawUriSpan)> for RawUri {
32    fn from((text, span): (&str, RawUriSpan)) -> Self {
33        RawUri {
34            text: text.to_string(),
35            element: None,
36            attribute: None,
37            span,
38        }
39    }
40}
41
42/// A span of a [`RawUri`] in the document.
43///
44/// The span can be used to give more precise error messages.
45#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
46pub struct RawUriSpan {
47    /// The line of the URI.
48    ///
49    /// The line is 1-based.
50    pub line: NonZeroUsize,
51    /// The column of the URI if computable.
52    ///
53    /// The column is 1-based.
54    /// This is `None`, if the column can't be computed exactly,
55    /// e.g. when it comes from the `html5ever` parser.
56    pub column: Option<NonZeroUsize>,
57}
58
59/// Test helper to create [`RawUriSpan`]s easily.
60#[cfg(test)]
61pub(crate) const fn span(line: usize, column: usize) -> RawUriSpan {
62    RawUriSpan {
63        line: NonZeroUsize::new(line).unwrap(),
64        column: Some(NonZeroUsize::new(column).unwrap()),
65    }
66}
67
68/// Test helper to create a [`RawUriSpan`] from just the line and leave the column unset.
69#[cfg(test)]
70pub(crate) const fn span_line(line: usize) -> RawUriSpan {
71    RawUriSpan {
72        line: std::num::NonZeroUsize::new(line).unwrap(),
73        column: None,
74    }
75}
76
77/// A trait for calculating a [`RawUriSpan`] at a given byte offset in the document.
78///
79/// If you have a document and want spans with absolute positions, use [`SourceSpanProvider`].
80/// If you start inside a document at a given offset, use [`OffsetSpanProvider`].
81pub(crate) trait SpanProvider {
82    /// Compute the [`RawUriSpan`] at a given byte offset in the document.
83    fn span(&self, offset: usize) -> RawUriSpan;
84}
85
86/// A [`SpanProvider`] which calculates spans depending on the input lines.
87///
88/// Precomputes line lengths so that constructing [`RawUriSpan`]s is faster.
89/// If you start inside a document at a given offset, consider using [`OffsetSpanProvider`].
90#[derive(Clone, Debug)]
91pub(crate) struct SourceSpanProvider<'a> {
92    /// The computed map from line number to offset in the document.
93    line_starts: Vec<usize>,
94    /// The input document.
95    ///
96    /// This is used to compute column information, since we can't rely on each character being a
97    /// single byte long.
98    input: &'a str,
99}
100
101impl<'a> SourceSpanProvider<'a> {
102    /// Create a [`SpanProvider`] from the given document.
103    ///
104    /// If the input is part of a larger document, consider using [`OffsetSpanProvider`] instead.
105    ///
106    /// This function isn't just a simple constructor but does some work, so call this only if you
107    /// want to use it.
108    pub(crate) fn from_input(input: &'a str) -> Self {
109        // FIXME: Consider making this lazy?
110        let line_starts: Vec<_> = core::iter::once(0)
111            .chain(input.match_indices('\n').map(|(i, _)| i + 1))
112            .collect();
113        Self { line_starts, input }
114    }
115}
116
117impl SpanProvider for SourceSpanProvider<'_> {
118    fn span(&self, offset: usize) -> RawUriSpan {
119        const ONE: NonZeroUsize = NonZeroUsize::MIN;
120        let line = match self.line_starts.binary_search(&offset) {
121            Ok(i) => i,
122            Err(i) => i - 1,
123        };
124        // Since we get the index by the binary_search above and subtract `1` if it would be larger
125        // than the length of the document, this shouldn't panic.
126        let line_offset = self.line_starts[line];
127        let column = self
128            .input
129            .get(line_offset..offset)
130            .or_else(|| self.input.get(line_offset..))
131            // columns are 1-based
132            .map(|v| ONE.saturating_add(v.chars().count()));
133
134        RawUriSpan {
135            // lines are 1-based
136            line: ONE.saturating_add(line),
137            column,
138        }
139    }
140}
141
142/// A [`SpanProvider`] which starts at a given offset in the document.
143///
144/// All given offsets are changed by the given amount before computing the
145/// resulting [`RawUriSpan`] with the inner [`SpanProvider`].
146#[derive(Clone, Debug)]
147pub(crate) struct OffsetSpanProvider<'a, T: SpanProvider = SourceSpanProvider<'a>> {
148    /// The byte offset in the document by which all given offsets are changed before computing the
149    /// resulting [`RawUriSpan`] with the inner [`SpanProvider`].
150    pub(crate) offset: usize,
151    /// The inner [`SpanProvider`] which will be used to determine the spans.
152    pub(crate) inner: &'a T,
153}
154
155impl<T: SpanProvider> SpanProvider for OffsetSpanProvider<'_, T> {
156    fn span(&self, offset: usize) -> RawUriSpan {
157        self.inner.span(self.offset + offset)
158    }
159}