Skip to main content

bibtex_parser/
source.rs

1//! Source identity and byte-to-line location utilities.
2
3use crate::{SourceId, SourceSpan};
4use std::borrow::Cow;
5
6/// A line-indexed view of a parsed source.
7///
8/// Byte offsets are zero-based UTF-8 offsets. Lines and columns are one-based,
9/// and columns count Unicode scalar values, not bytes. End positions point to
10/// the location immediately after the covered byte range.
11#[derive(Debug, Clone)]
12pub struct SourceMap<'a> {
13    source: Option<SourceId>,
14    name: Option<Cow<'a, str>>,
15    input: &'a str,
16    line_starts: Vec<usize>,
17}
18
19impl<'a> SourceMap<'a> {
20    /// Create an anonymous source map.
21    #[must_use]
22    pub fn anonymous(input: &'a str) -> Self {
23        Self::new(None, None, input)
24    }
25
26    /// Create a source map with a document-local source identifier and optional name.
27    #[must_use]
28    pub fn new(source: Option<SourceId>, name: Option<Cow<'a, str>>, input: &'a str) -> Self {
29        let mut line_starts = Vec::new();
30        line_starts.push(0);
31        for (index, byte) in input.bytes().enumerate() {
32            if byte == b'\n' {
33                line_starts.push(index + 1);
34            }
35        }
36
37        Self {
38            source,
39            name,
40            input,
41            line_starts,
42        }
43    }
44
45    /// Return this source's identifier, when it has one.
46    #[must_use]
47    pub const fn source_id(&self) -> Option<SourceId> {
48        self.source
49    }
50
51    /// Return this source's caller-provided name.
52    #[must_use]
53    pub fn name(&self) -> Option<&str> {
54        self.name.as_deref()
55    }
56
57    /// Return the number of bytes in the source.
58    #[must_use]
59    pub const fn len(&self) -> usize {
60        self.input.len()
61    }
62
63    /// Return the underlying source text.
64    #[must_use]
65    pub const fn input(&self) -> &'a str {
66        self.input
67    }
68
69    /// Return true when this source is empty.
70    #[must_use]
71    pub const fn is_empty(&self) -> bool {
72        self.input.is_empty()
73    }
74
75    /// Return the line and column for a byte offset.
76    ///
77    /// Offsets past end-of-file are clamped to the end of the source.
78    #[must_use]
79    pub fn line_column(&self, byte: usize) -> (usize, usize) {
80        let byte = byte.min(self.input.len());
81        let line_index = match self.line_starts.binary_search(&byte) {
82            Ok(index) => index,
83            Err(0) => 0,
84            Err(index) => index - 1,
85        };
86        let line_start = self.line_starts[line_index];
87        let column = self.input[line_start..byte].chars().count() + 1;
88        (line_index + 1, column)
89    }
90
91    /// Return the byte offset for a one-based line and column.
92    ///
93    /// Columns count Unicode scalar values. A column one past the end of the
94    /// line resolves to the line-end byte offset.
95    #[must_use]
96    pub fn byte_at_line_column(&self, line: usize, column: usize) -> Option<usize> {
97        if line == 0 || column == 0 {
98            return None;
99        }
100        let line_start = *self.line_starts.get(line - 1)?;
101        let line_end = self
102            .line_starts
103            .get(line)
104            .map_or(self.input.len(), |next| next.saturating_sub(1));
105        let line_text = self.input.get(line_start..line_end)?;
106        if column == 1 {
107            return Some(line_start);
108        }
109        let mut current_column = 1usize;
110        for (offset, _) in line_text.char_indices() {
111            if current_column == column {
112                return Some(line_start + offset);
113            }
114            current_column += 1;
115        }
116        if current_column == column {
117            Some(line_end)
118        } else {
119            None
120        }
121    }
122
123    /// Create a source span for a byte range.
124    ///
125    /// The range is clamped to the source length. The returned span keeps the
126    /// half-open byte offsets and one-based start/end line-column positions.
127    #[must_use]
128    pub fn span(&self, byte_start: usize, byte_end: usize) -> SourceSpan {
129        let byte_start = byte_start.min(self.input.len());
130        let byte_end = byte_end.min(self.input.len()).max(byte_start);
131        let (line, column) = self.line_column(byte_start);
132        let (end_line, end_column) = self.line_column(byte_end);
133        let span = SourceSpan::with_end(byte_start, byte_end, line, column, end_line, end_column);
134        self.source.map_or(span, |source| span.with_source(source))
135    }
136
137    /// Return a borrowed slice for a source span when it belongs to this source.
138    #[must_use]
139    pub fn slice(&self, span: SourceSpan) -> Option<&'a str> {
140        if span.source.is_some() && span.source != self.source {
141            return None;
142        }
143        self.input.get(span.byte_start..span.byte_end)
144    }
145
146    /// Return a short line-oriented snippet for a span.
147    #[must_use]
148    pub fn snippet(&self, span: SourceSpan, max_chars: usize) -> Option<String> {
149        if span.source.is_some() && span.source != self.source {
150            return None;
151        }
152
153        let anchor_start = if span.is_empty() && span.byte_start > 0 {
154            span.byte_start - 1
155        } else {
156            span.byte_start
157        };
158        let anchor_end = if span.is_empty() && span.byte_end > 0 {
159            span.byte_end - 1
160        } else {
161            span.byte_end
162        };
163
164        let start = self.input[..anchor_start]
165            .rfind('\n')
166            .map_or(0, |index| index + 1);
167        let end = self.input[anchor_end..]
168            .find('\n')
169            .map_or(self.input.len(), |index| anchor_end + index);
170        let snippet = self.input.get(start..end)?;
171
172        if snippet.chars().count() <= max_chars {
173            return Some(snippet.to_string());
174        }
175
176        Some(snippet.chars().take(max_chars).collect())
177    }
178}