lychee_lib/types/uri/raw.rs
1use std::{fmt::Display, num::NonZeroUsize};
2
3/// A raw URI that got extracted from a document with a fuzzy parser.
4/// Note that this can still be invalid according to stricter URI standards
5#[derive(Clone, Debug, PartialEq, Eq, Hash)]
6pub struct RawUri {
7 /// Unparsed URI represented as a `String`. There is no guarantee that it
8 /// can be parsed into a URI object
9 pub text: String,
10 /// Name of the element that contained the URI (e.g. `a` for the <a> tag).
11 /// This is a way to classify links to make it easier to offer fine control
12 /// over the links that will be checked e.g. by trying to filter out links
13 /// that were found in unwanted tags like `<pre>` or `<code>`.
14 pub element: Option<String>,
15 /// Name of the attribute that contained the URI (e.g. `src`). This is a way
16 /// to classify links to make it easier to offer fine control over the links
17 /// that will be checked e.g. by trying to filter out links that were found
18 /// in unwanted attributes like `srcset` or `manifest`.
19 pub attribute: Option<String>,
20 /// The position of the URI in the document.
21 pub span: RawUriSpan,
22}
23
24impl Display for RawUri {
25 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
26 write!(f, "{:?} (Attribute: {:?})", self.text, self.attribute)
27 }
28}
29
30#[cfg(test)]
31impl From<(&str, RawUriSpan)> for RawUri {
32 fn from((text, span): (&str, RawUriSpan)) -> Self {
33 RawUri {
34 text: text.to_string(),
35 element: None,
36 attribute: None,
37 span,
38 }
39 }
40}
41
42/// A span of a [`RawUri`] in the document.
43///
44/// The span can be used to give more precise error messages.
45#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
46pub struct RawUriSpan {
47 /// The line of the URI.
48 ///
49 /// The line is 1-based.
50 pub line: NonZeroUsize,
51 /// The column of the URI if computable.
52 ///
53 /// The column is 1-based.
54 /// This is `None`, if the column can't be computed exactly,
55 /// e.g. when it comes from the `html5ever` parser.
56 pub column: Option<NonZeroUsize>,
57}
58
59/// Test helper to create [`RawUriSpan`]s easily.
60#[cfg(test)]
61pub(crate) const fn span(line: usize, column: usize) -> RawUriSpan {
62 RawUriSpan {
63 line: NonZeroUsize::new(line).unwrap(),
64 column: Some(NonZeroUsize::new(column).unwrap()),
65 }
66}
67
68/// Test helper to create a [`RawUriSpan`] from just the line and leave the column unset.
69#[cfg(test)]
70pub(crate) const fn span_line(line: usize) -> RawUriSpan {
71 RawUriSpan {
72 line: std::num::NonZeroUsize::new(line).unwrap(),
73 column: None,
74 }
75}
76
77/// A trait for calculating a [`RawUriSpan`] at a given byte offset in the document.
78///
79/// If you have a document and want spans with absolute positions, use [`SourceSpanProvider`].
80/// If you start inside a document at a given offset, use [`OffsetSpanProvider`].
81pub(crate) trait SpanProvider {
82 /// Compute the [`RawUriSpan`] at a given byte offset in the document.
83 fn span(&self, offset: usize) -> RawUriSpan;
84}
85
86/// A [`SpanProvider`] which calculates spans depending on the input lines.
87///
88/// Precomputes line lengths so that constructing [`RawUriSpan`]s is faster.
89/// If you start inside a document at a given offset, consider using [`OffsetSpanProvider`].
90#[derive(Clone, Debug)]
91pub(crate) struct SourceSpanProvider<'a> {
92 /// The computed map from line number to offset in the document.
93 line_starts: Vec<usize>,
94 /// The input document.
95 ///
96 /// This is used to compute column information, since we can't rely on each character being a
97 /// single byte long.
98 input: &'a str,
99}
100
101impl<'a> SourceSpanProvider<'a> {
102 /// Create a [`SpanProvider`] from the given document.
103 ///
104 /// If the input is part of a larger document, consider using [`OffsetSpanProvider`] instead.
105 ///
106 /// This function isn't just a simple constructor but does some work, so call this only if you
107 /// want to use it.
108 pub(crate) fn from_input(input: &'a str) -> Self {
109 // FIXME: Consider making this lazy?
110 let line_starts: Vec<_> = core::iter::once(0)
111 .chain(input.match_indices('\n').map(|(i, _)| i + 1))
112 .collect();
113 Self { line_starts, input }
114 }
115}
116
117impl SpanProvider for SourceSpanProvider<'_> {
118 fn span(&self, offset: usize) -> RawUriSpan {
119 const ONE: NonZeroUsize = NonZeroUsize::MIN;
120 let line = match self.line_starts.binary_search(&offset) {
121 Ok(i) => i,
122 Err(i) => i - 1,
123 };
124 // Since we get the index by the binary_search above and subtract `1` if it would be larger
125 // than the length of the document, this shouldn't panic.
126 let line_offset = self.line_starts[line];
127 let column = self
128 .input
129 .get(line_offset..offset)
130 .or_else(|| self.input.get(line_offset..))
131 // columns are 1-based
132 .map(|v| ONE.saturating_add(v.chars().count()));
133
134 RawUriSpan {
135 // lines are 1-based
136 line: ONE.saturating_add(line),
137 column,
138 }
139 }
140}
141
142/// A [`SpanProvider`] which starts at a given offset in the document.
143///
144/// All given offsets are changed by the given amount before computing the
145/// resulting [`RawUriSpan`] with the inner [`SpanProvider`].
146#[derive(Clone, Debug)]
147pub(crate) struct OffsetSpanProvider<'a, T: SpanProvider = SourceSpanProvider<'a>> {
148 /// The byte offset in the document by which all given offsets are changed before computing the
149 /// resulting [`RawUriSpan`] with the inner [`SpanProvider`].
150 pub(crate) offset: usize,
151 /// The inner [`SpanProvider`] which will be used to determine the spans.
152 pub(crate) inner: &'a T,
153}
154
155impl<T: SpanProvider> SpanProvider for OffsetSpanProvider<'_, T> {
156 fn span(&self, offset: usize) -> RawUriSpan {
157 self.inner.span(self.offset + offset)
158 }
159}