miden_debug_types/
source_file.rs

1use alloc::{
2    boxed::Box,
3    string::{String, ToString},
4    sync::Arc,
5    vec::Vec,
6};
7use core::{fmt, num::NonZeroU32, ops::Range};
8
9#[cfg(feature = "serde")]
10use serde::{Deserialize, Serialize};
11
12use super::{FileLineCol, Position, Selection, SourceId, SourceSpan, Uri};
13
14// SOURCE LANGUAGE
15// ================================================================================================
16
17#[derive(Debug, Copy, Clone, PartialEq, Eq)]
18pub enum SourceLanguage {
19    Masm,
20    Rust,
21    Other(&'static str),
22}
23
24impl AsRef<str> for SourceLanguage {
25    fn as_ref(&self) -> &str {
26        match self {
27            Self::Masm => "masm",
28            Self::Rust => "rust",
29            Self::Other(other) => other,
30        }
31    }
32}
33
34// SOURCE FILE
35// ================================================================================================
36
37/// A [SourceFile] represents a single file stored in a [super::SourceManager]
38#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
39#[cfg_attr(feature = "serde", derive(Deserialize, Serialize))]
40pub struct SourceFile {
41    /// The unique identifier allocated for this [SourceFile] by its owning [super::SourceManager]
42    id: SourceId,
43    /// The file content
44    #[cfg_attr(
45        feature = "serde",
46        serde(deserialize_with = "SourceContent::deserialize_and_recompute_line_starts")
47    )]
48    content: SourceContent,
49}
50
51impl miette::SourceCode for SourceFile {
52    fn read_span<'a>(
53        &'a self,
54        span: &miette::SourceSpan,
55        context_lines_before: usize,
56        context_lines_after: usize,
57    ) -> Result<alloc::boxed::Box<dyn miette::SpanContents<'a> + 'a>, miette::MietteError> {
58        let mut start =
59            u32::try_from(span.offset()).map_err(|_| miette::MietteError::OutOfBounds)?;
60        let len = u32::try_from(span.len()).map_err(|_| miette::MietteError::OutOfBounds)?;
61        let mut end = start.checked_add(len).ok_or(miette::MietteError::OutOfBounds)?;
62        if context_lines_before > 0 {
63            let line_index = self.content.line_index(start.into());
64            let start_line_index = line_index.saturating_sub(context_lines_before as u32);
65            start = self.content.line_start(start_line_index).map(|idx| idx.to_u32()).unwrap_or(0);
66        }
67        if context_lines_after > 0 {
68            let line_index = self.content.line_index(end.into());
69            let end_line_index = line_index
70                .checked_add(context_lines_after as u32)
71                .ok_or(miette::MietteError::OutOfBounds)?;
72            end = self
73                .content
74                .line_range(end_line_index)
75                .map(|range| range.end.to_u32())
76                .unwrap_or_else(|| self.content.source_range().end.to_u32());
77        }
78        Ok(Box::new(ScopedSourceFileRef {
79            file: self,
80            span: miette::SourceSpan::new((start as usize).into(), end.abs_diff(start) as usize),
81        }))
82    }
83}
84
85impl SourceFile {
86    /// Create a new [SourceFile] from its raw components
87    pub fn new(id: SourceId, lang: SourceLanguage, uri: Uri, content: impl Into<Box<str>>) -> Self {
88        let content = SourceContent::new(lang, uri, content.into());
89        Self { id, content }
90    }
91
92    /// This function is intended for use by [super::SourceManager] implementations that need to
93    /// construct a [SourceFile] from its raw components (i.e. the identifier for the source file
94    /// and its content).
95    ///
96    /// Since the only entity that should be constructing a [SourceId] is a [super::SourceManager],
97    /// it is only valid to call this function in one of two scenarios:
98    ///
99    /// 1. You are a [super::SourceManager] constructing a [SourceFile] after allocating a
100    ///    [SourceId]
101    /// 2. You pass [`SourceId::default()`], i.e. [`SourceId::UNKNOWN`] for the source identifier.
102    ///    The resulting [SourceFile] will be valid and safe to use in a context where there isn't a
103    ///    [super::SourceManager] present. If there is a source manager in use, then constructing
104    ///    detached [SourceFile]s is _not_ recommended, because it will make it confusing to
105    ///    determine whether a given [SourceFile] reference is safe to use.
106    ///
107    /// You should rarely, if ever, fall in camp 2 - but it can be handy in some narrow cases
108    pub fn from_raw_parts(id: SourceId, content: SourceContent) -> Self {
109        Self { id, content }
110    }
111
112    /// Get the [SourceId] associated with this file
113    pub const fn id(&self) -> SourceId {
114        self.id
115    }
116
117    /// Get the name of this source file
118    pub fn uri(&self) -> &Uri {
119        self.content.uri()
120    }
121
122    /// Returns a reference to the underlying [SourceContent]
123    pub fn content(&self) -> &SourceContent {
124        &self.content
125    }
126
127    /// Returns a mutable reference to the underlying [SourceContent]
128    pub fn content_mut(&mut self) -> &mut SourceContent {
129        &mut self.content
130    }
131
132    /// Returns the number of lines in this file
133    pub fn line_count(&self) -> usize {
134        self.content.line_starts.len()
135    }
136
137    /// Returns the number of bytes in this file
138    pub fn len(&self) -> usize {
139        self.content.len()
140    }
141
142    /// Returns true if this file is empty
143    pub fn is_empty(&self) -> bool {
144        self.content.is_empty()
145    }
146
147    /// Get the underlying content of this file
148    #[inline(always)]
149    pub fn as_str(&self) -> &str {
150        self.content.as_str()
151    }
152
153    /// Get the underlying content of this file as a byte slice
154    #[inline(always)]
155    pub fn as_bytes(&self) -> &[u8] {
156        self.content.as_bytes()
157    }
158
159    /// Returns a [SourceSpan] covering the entirety of this file
160    #[inline]
161    pub fn source_span(&self) -> SourceSpan {
162        let range = self.content.source_range();
163        SourceSpan::new(self.id, range.start.0..range.end.0)
164    }
165
166    /// Returns a subset of the underlying content as a string slice.
167    ///
168    /// The bounds of the given span are character indices, _not_ byte indices.
169    ///
170    /// Returns `None` if the given span is out of bounds
171    #[inline(always)]
172    pub fn source_slice(&self, span: impl Into<Range<usize>>) -> Option<&str> {
173        self.content.source_slice(span)
174    }
175
176    /// Returns a [SourceFileRef] corresponding to the bytes contained in the specified span.
177    pub fn slice(self: &Arc<Self>, span: impl Into<Range<u32>>) -> SourceFileRef {
178        SourceFileRef::new(Arc::clone(self), span)
179    }
180
181    /// Get a [SourceSpan] which points to the first byte of the character at `column` on `line`
182    ///
183    /// Returns `None` if the given line/column is out of bounds for this file.
184    pub fn line_column_to_span(
185        &self,
186        line: LineNumber,
187        column: ColumnNumber,
188    ) -> Option<SourceSpan> {
189        let offset = self.content.line_column_to_offset(line.into(), column.into())?;
190        Some(SourceSpan::at(self.id, offset.0))
191    }
192
193    /// Get a [FileLineCol] equivalent to the start of the given [SourceSpan]
194    pub fn location(&self, span: SourceSpan) -> FileLineCol {
195        assert_eq!(span.source_id(), self.id, "mismatched source ids");
196
197        self.content
198            .location(ByteIndex(span.into_range().start))
199            .expect("invalid source span: starting byte is out of bounds")
200    }
201}
202
203impl AsRef<str> for SourceFile {
204    #[inline(always)]
205    fn as_ref(&self) -> &str {
206        self.as_str()
207    }
208}
209
210impl AsRef<[u8]> for SourceFile {
211    #[inline(always)]
212    fn as_ref(&self) -> &[u8] {
213        self.as_bytes()
214    }
215}
216
217// SOURCE FILE REF
218// ================================================================================================
219
220/// A reference to a specific spanned region of a [SourceFile], that provides access to the actual
221/// [SourceFile], but scoped to the span it was created with.
222///
223/// This is useful in error types that implement [miette::Diagnostic], as it contains all of the
224/// data necessary to render the source code being referenced, without a [super::SourceManager] on
225/// hand.
226#[derive(Debug, Clone)]
227pub struct SourceFileRef {
228    file: Arc<SourceFile>,
229    span: SourceSpan,
230}
231
232impl SourceFileRef {
233    /// Create a [SourceFileRef] from a [SourceFile] and desired span (in bytes)
234    ///
235    /// The given span will be constrained to the bytes of `file`, so a span that reaches out of
236    /// bounds will have its end bound set to the last byte of the file.
237    pub fn new(file: Arc<SourceFile>, span: impl Into<Range<u32>>) -> Self {
238        let span = span.into();
239        let end = core::cmp::min(span.end, file.len() as u32);
240        let span = SourceSpan::new(file.id(), span.start..end);
241        Self { file, span }
242    }
243
244    /// Returns a ref-counted handle to the underlying [SourceFile]
245    pub fn source_file(&self) -> Arc<SourceFile> {
246        self.file.clone()
247    }
248
249    /// Returns the URI of the file this [SourceFileRef] is selecting
250    pub fn uri(&self) -> &Uri {
251        self.file.uri()
252    }
253
254    /// Returns the [SourceSpan] selected by this [SourceFileRef]
255    pub const fn span(&self) -> SourceSpan {
256        self.span
257    }
258
259    /// Returns the underlying `str` selected by this [SourceFileRef]
260    pub fn as_str(&self) -> &str {
261        self.file.source_slice(self.span).unwrap()
262    }
263
264    /// Returns the underlying bytes selected by this [SourceFileRef]
265    #[inline]
266    pub fn as_bytes(&self) -> &[u8] {
267        self.as_str().as_bytes()
268    }
269
270    /// Returns the number of bytes represented by the subset of the underlying file that is covered
271    /// by this [SourceFileRef]
272    pub fn len(&self) -> usize {
273        self.span.len()
274    }
275
276    /// Returns true if this selection is empty
277    pub fn is_empty(&self) -> bool {
278        self.len() == 0
279    }
280}
281
282impl Eq for SourceFileRef {}
283
284impl PartialEq for SourceFileRef {
285    fn eq(&self, other: &Self) -> bool {
286        self.as_str() == other.as_str()
287    }
288}
289
290impl Ord for SourceFileRef {
291    fn cmp(&self, other: &Self) -> core::cmp::Ordering {
292        self.as_str().cmp(other.as_str())
293    }
294}
295
296impl PartialOrd for SourceFileRef {
297    #[inline(always)]
298    fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> {
299        Some(self.cmp(other))
300    }
301}
302
303impl core::hash::Hash for SourceFileRef {
304    fn hash<H: core::hash::Hasher>(&self, state: &mut H) {
305        self.span.hash(state);
306        self.as_str().hash(state);
307    }
308}
309
310impl AsRef<str> for SourceFileRef {
311    #[inline(always)]
312    fn as_ref(&self) -> &str {
313        self.as_str()
314    }
315}
316
317impl AsRef<[u8]> for SourceFileRef {
318    #[inline(always)]
319    fn as_ref(&self) -> &[u8] {
320        self.as_bytes()
321    }
322}
323
324impl From<&SourceFileRef> for miette::SourceSpan {
325    fn from(source: &SourceFileRef) -> Self {
326        source.span.into()
327    }
328}
329
330/// Used to implement [miette::SpanContents] for [SourceFile] and [SourceFileRef]
331struct ScopedSourceFileRef<'a> {
332    file: &'a SourceFile,
333    span: miette::SourceSpan,
334}
335
336impl<'a> miette::SpanContents<'a> for ScopedSourceFileRef<'a> {
337    #[inline]
338    fn data(&self) -> &'a [u8] {
339        let start = self.span.offset();
340        let end = start + self.span.len();
341        &self.file.as_bytes()[start..end]
342    }
343
344    #[inline]
345    fn span(&self) -> &miette::SourceSpan {
346        &self.span
347    }
348
349    fn line(&self) -> usize {
350        let offset = self.span.offset() as u32;
351        self.file.content.line_index(offset.into()).to_usize()
352    }
353
354    fn column(&self) -> usize {
355        let start = self.span.offset() as u32;
356        let end = start + self.span.len() as u32;
357        let span = SourceSpan::new(self.file.id(), start..end);
358        let loc = self.file.location(span);
359        loc.column.to_index().to_usize()
360    }
361
362    #[inline]
363    fn line_count(&self) -> usize {
364        self.file.line_count()
365    }
366
367    #[inline]
368    fn name(&self) -> Option<&str> {
369        Some(self.file.uri().as_ref())
370    }
371
372    #[inline]
373    fn language(&self) -> Option<&str> {
374        None
375    }
376}
377
378impl miette::SourceCode for SourceFileRef {
379    #[inline]
380    fn read_span<'a>(
381        &'a self,
382        span: &miette::SourceSpan,
383        context_lines_before: usize,
384        context_lines_after: usize,
385    ) -> Result<alloc::boxed::Box<dyn miette::SpanContents<'a> + 'a>, miette::MietteError> {
386        self.file.read_span(span, context_lines_before, context_lines_after)
387    }
388}
389
390// SOURCE CONTENT
391// ================================================================================================
392
393/// Represents key information about a source file and its content:
394///
395/// * The path to the file (or its name, in the case of virtual files)
396/// * The content of the file
397/// * The byte offsets of every line in the file, for use in looking up line/column information
398#[derive(Clone)]
399#[cfg_attr(feature = "serde", derive(Deserialize, Serialize))]
400pub struct SourceContent {
401    /// The language identifier for this source file
402    language: Box<str>,
403    /// The path (or name) of this file
404    uri: Uri,
405    /// The underlying content of this file
406    content: String,
407    /// The byte offsets for each line in this file
408    #[cfg_attr(feature = "serde", serde(default, skip))]
409    line_starts: Vec<ByteIndex>,
410    /// The document version
411    #[cfg_attr(feature = "serde", serde(default))]
412    version: i32,
413}
414
415impl fmt::Debug for SourceContent {
416    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
417        let Self {
418            language,
419            uri,
420            content,
421            line_starts,
422            version,
423        } = self;
424        f.debug_struct("SourceContent")
425            .field("version", version)
426            .field("language", language)
427            .field("uri", uri)
428            .field("size_in_bytes", &content.len())
429            .field("line_count", &line_starts.len())
430            .field("content", content)
431            .finish()
432    }
433}
434
435impl Eq for SourceContent {}
436
437impl PartialEq for SourceContent {
438    #[inline]
439    fn eq(&self, other: &Self) -> bool {
440        self.language == other.language && self.uri == other.uri && self.content == other.content
441    }
442}
443
444impl Ord for SourceContent {
445    #[inline]
446    fn cmp(&self, other: &Self) -> core::cmp::Ordering {
447        self.uri.cmp(&other.uri).then_with(|| self.content.cmp(&other.content))
448    }
449}
450
451impl PartialOrd for SourceContent {
452    #[inline]
453    fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> {
454        Some(self.cmp(other))
455    }
456}
457
458impl core::hash::Hash for SourceContent {
459    fn hash<H: core::hash::Hasher>(&self, state: &mut H) {
460        self.language.hash(state);
461        self.uri.hash(state);
462        self.content.hash(state);
463    }
464}
465
466#[derive(Debug, thiserror::Error)]
467pub enum SourceContentUpdateError {
468    #[error("invalid content selection: start position of {}:{} is out of bounds", .0.line, .0.character)]
469    InvalidSelectionStart(Position),
470    #[error("invalid content selection: end position of {}:{} is out of bounds", .0.line, .0.character)]
471    InvalidSelectionEnd(Position),
472}
473
474impl SourceContent {
475    /// Create a new [SourceContent] from the (possibly virtual) file path, and its content as a
476    /// UTF-8 string.
477    ///
478    /// When created, the line starts for this file will be computed, which requires scanning the
479    /// file content once.
480    pub fn new(language: impl AsRef<str>, uri: impl Into<Uri>, content: impl Into<String>) -> Self {
481        let language = language.as_ref().to_string().into_boxed_str();
482        let content: String = content.into();
483        let bytes = content.as_bytes();
484
485        assert!(
486            bytes.len() < u32::MAX as usize,
487            "unsupported source file: current maximum supported length in bytes is 2^32"
488        );
489
490        let line_starts = compute_line_starts(&content, None);
491
492        Self {
493            language,
494            uri: uri.into(),
495            content,
496            line_starts,
497            version: 0,
498        }
499    }
500
501    /// Get the language identifier of this source file
502    pub fn language(&self) -> &str {
503        &self.language
504    }
505
506    /// Get the current version of this source file's content
507    pub fn version(&self) -> i32 {
508        self.version
509    }
510
511    /// Set the current version of this content
512    #[inline(always)]
513    pub fn set_version(&mut self, version: i32) {
514        self.version = version;
515    }
516
517    /// Get the URI of this source file
518    #[inline]
519    pub fn uri(&self) -> &Uri {
520        &self.uri
521    }
522
523    /// Returns the underlying content as a string slice
524    #[inline(always)]
525    pub fn as_str(&self) -> &str {
526        self.content.as_ref()
527    }
528
529    /// Returns the underlying content as a byte slice
530    #[inline(always)]
531    pub fn as_bytes(&self) -> &[u8] {
532        self.content.as_bytes()
533    }
534
535    /// Returns the size in bytes of the underlying content
536    #[inline(always)]
537    pub fn len(&self) -> usize {
538        self.content.len()
539    }
540
541    /// Returns true if the underlying content is empty
542    #[inline(always)]
543    pub fn is_empty(&self) -> bool {
544        self.content.is_empty()
545    }
546
547    /// Returns the range of valid byte indices for this file
548    #[inline]
549    pub fn source_range(&self) -> Range<ByteIndex> {
550        ByteIndex(0)..ByteIndex(self.content.len() as u32)
551    }
552
553    /// Returns a subset of the underlying content as a string slice.
554    ///
555    /// The bounds of the given span are character indices, _not_ byte indices.
556    ///
557    /// Returns `None` if the given span is out of bounds
558    #[inline(always)]
559    pub fn source_slice(&self, span: impl Into<Range<usize>>) -> Option<&str> {
560        self.as_str().get(span.into())
561    }
562
563    /// Returns a subset of the underlying content as a byte slice.
564    ///
565    /// Returns `None` if the given span is out of bounds
566    #[inline(always)]
567    pub fn byte_slice(&self, span: impl Into<Range<ByteIndex>>) -> Option<&[u8]> {
568        let Range { start, end } = span.into();
569        self.as_bytes().get(start.to_usize()..end.to_usize())
570    }
571
572    /// Like [Self::source_slice], but the slice is computed like a selection in an editor, i.e.
573    /// based on line/column positions, rather than raw character indices.
574    ///
575    /// This is useful when mapping LSP operations to content in the source file.
576    pub fn select(&self, mut range: Selection) -> Option<&str> {
577        range.canonicalize();
578
579        let start = self.line_column_to_offset(range.start.line, range.start.character)?;
580        let end = self.line_column_to_offset(range.end.line, range.end.character)?;
581
582        Some(&self.as_str()[start.to_usize()..end.to_usize()])
583    }
584
585    /// Returns the number of lines in the source content
586    pub fn line_count(&self) -> usize {
587        self.line_starts.len()
588    }
589
590    /// Returns the byte index at which the line corresponding to `line_index` starts
591    ///
592    /// Returns `None` if the given index is out of bounds
593    pub fn line_start(&self, line_index: LineIndex) -> Option<ByteIndex> {
594        self.line_starts.get(line_index.to_usize()).copied()
595    }
596
597    /// Returns the index of the last line in this file
598    pub fn last_line_index(&self) -> LineIndex {
599        LineIndex(self.line_count().saturating_sub(1).try_into().expect("too many lines in file"))
600    }
601
602    /// Get the range of byte indices covered by the given line
603    pub fn line_range(&self, line_index: LineIndex) -> Option<Range<ByteIndex>> {
604        let line_start = self.line_start(line_index)?;
605        match self.line_start(line_index + 1) {
606            Some(line_end) => Some(line_start..line_end),
607            None => Some(line_start..ByteIndex(self.content.len() as u32)),
608        }
609    }
610
611    /// Get the index of the line to which `byte_index` belongs
612    pub fn line_index(&self, byte_index: ByteIndex) -> LineIndex {
613        match self.line_starts.binary_search(&byte_index) {
614            Ok(line) => LineIndex(line as u32),
615            Err(next_line) => LineIndex(next_line as u32 - 1),
616        }
617    }
618
619    /// Get the [ByteIndex] corresponding to the given line and column indices.
620    ///
621    /// Returns `None` if the line or column indices are out of bounds.
622    pub fn line_column_to_offset(
623        &self,
624        line_index: LineIndex,
625        column_index: ColumnIndex,
626    ) -> Option<ByteIndex> {
627        let column_index = column_index.to_usize();
628        let line_span = self.line_range(line_index)?;
629        let line_src = self
630            .content
631            .get(line_span.start.to_usize()..line_span.end.to_usize())
632            .expect("invalid line boundaries: invalid utf-8");
633        if line_src.len() < column_index {
634            return None;
635        }
636        let (pre, _) = line_src.split_at(column_index);
637        let start = line_span.start;
638        Some(start + ByteOffset::from_str_len(pre))
639    }
640
641    /// Get a [FileLineCol] corresponding to the line/column in this file at which `byte_index`
642    /// occurs
643    pub fn location(&self, byte_index: ByteIndex) -> Option<FileLineCol> {
644        let line_index = self.line_index(byte_index);
645        let line_start_index = self.line_start(line_index)?;
646        let line_src = self.content.get(line_start_index.to_usize()..byte_index.to_usize())?;
647        let column_index = ColumnIndex::from(line_src.chars().count() as u32);
648        Some(FileLineCol {
649            uri: self.uri.clone(),
650            line: line_index.number(),
651            column: column_index.number(),
652        })
653    }
654
655    /// Update the source document after being notified of a change event.
656    ///
657    /// The `version` indicates the new version of the document
658    ///
659    /// NOTE: This is intended to update a [super::SourceManager]'s view of the content of the
660    /// document, _not_ to perform an update against the actual file, wherever it may be.
661    pub fn update(
662        &mut self,
663        text: String,
664        range: Option<Selection>,
665        version: i32,
666    ) -> Result<(), SourceContentUpdateError> {
667        match range {
668            Some(range) => {
669                let start = self
670                    .line_column_to_offset(range.start.line, range.start.character)
671                    .ok_or(SourceContentUpdateError::InvalidSelectionStart(range.start))?
672                    .to_usize();
673                let end = self
674                    .line_column_to_offset(range.end.line, range.end.character)
675                    .ok_or(SourceContentUpdateError::InvalidSelectionEnd(range.start))?
676                    .to_usize();
677                assert!(start <= end, "start of range must be less than end, got {start}..{end}",);
678                self.content.replace_range(start..end, &text);
679
680                let added_line_starts = compute_line_starts(&text, Some(start as u32));
681                let num_added = added_line_starts.len();
682                let splice_start = range.start.line.to_usize() + 1;
683                let splice_end =
684                    core::cmp::min(range.end.line.to_usize(), self.line_starts.len() - 1);
685                self.line_starts.splice(splice_start..=splice_end, added_line_starts);
686
687                let diff =
688                    (text.len() as i32).saturating_sub_unsigned((end as u32) - (start as u32));
689                if diff != 0 {
690                    for i in (splice_start + num_added)..self.line_starts.len() {
691                        self.line_starts[i] =
692                            ByteIndex(self.line_starts[i].to_u32().saturating_add_signed(diff));
693                    }
694                }
695            },
696            None => {
697                self.line_starts = compute_line_starts(&text, None);
698                self.content = text;
699            },
700        }
701
702        self.version = version;
703
704        Ok(())
705    }
706}
707
708#[cfg(feature = "serde")]
709impl SourceContent {
710    fn deserialize_and_recompute_line_starts<'de, D>(deserializer: D) -> Result<Self, D::Error>
711    where
712        D: serde::Deserializer<'de>,
713    {
714        let mut content = SourceContent::deserialize(deserializer)?;
715        content.line_starts = compute_line_starts(&content.content, None);
716        Ok(content)
717    }
718}
719
720fn compute_line_starts(text: &str, text_offset: Option<u32>) -> Vec<ByteIndex> {
721    let bytes = text.as_bytes();
722    let initial_line_offset = match text_offset {
723        Some(_) => None,
724        None => Some(ByteIndex(0)),
725    };
726    let text_offset = text_offset.unwrap_or(0);
727    initial_line_offset
728        .into_iter()
729        .chain(memchr::memchr_iter(b'\n', bytes).filter_map(|mut offset| {
730            // Determine if the newline has any preceding escapes
731            let mut preceding_escapes = 0;
732            let line_start = offset + 1;
733            while let Some(prev_offset) = offset.checked_sub(1) {
734                if bytes[prev_offset] == b'\\' {
735                    offset = prev_offset;
736                    preceding_escapes += 1;
737                    continue;
738                }
739                break;
740            }
741
742            // If the newline is escaped, do not count it as a new line
743            let is_escaped = preceding_escapes > 0 && preceding_escapes % 2 != 0;
744            if is_escaped {
745                None
746            } else {
747                Some(ByteIndex(text_offset + line_start as u32))
748            }
749        }))
750        .collect()
751}
752
753// SOURCE CONTENT INDICES
754// ================================================================================================
755
756/// An index representing the offset in bytes from the start of a source file
757#[derive(Default, Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
758#[cfg_attr(feature = "serde", derive(Deserialize, Serialize))]
759#[cfg_attr(feature = "serde", serde(transparent))]
760pub struct ByteIndex(pub u32);
761
762impl ByteIndex {
763    /// Create a [ByteIndex] from a raw `u32` index
764    pub const fn new(index: u32) -> Self {
765        Self(index)
766    }
767
768    /// Get the raw index as a usize
769    #[inline(always)]
770    pub const fn to_usize(self) -> usize {
771        self.0 as usize
772    }
773
774    /// Get the raw index as a u32
775    #[inline(always)]
776    pub const fn to_u32(self) -> u32 {
777        self.0
778    }
779}
780
781impl core::ops::Add<ByteOffset> for ByteIndex {
782    type Output = ByteIndex;
783
784    fn add(self, rhs: ByteOffset) -> Self {
785        Self((self.0 as i64 + rhs.0) as u32)
786    }
787}
788
789impl core::ops::Add<u32> for ByteIndex {
790    type Output = ByteIndex;
791
792    fn add(self, rhs: u32) -> Self {
793        Self(self.0 + rhs)
794    }
795}
796
797impl core::ops::AddAssign<ByteOffset> for ByteIndex {
798    fn add_assign(&mut self, rhs: ByteOffset) {
799        *self = *self + rhs;
800    }
801}
802
803impl core::ops::AddAssign<u32> for ByteIndex {
804    fn add_assign(&mut self, rhs: u32) {
805        self.0 += rhs;
806    }
807}
808
809impl core::ops::Sub<ByteOffset> for ByteIndex {
810    type Output = ByteIndex;
811
812    fn sub(self, rhs: ByteOffset) -> Self {
813        Self((self.0 as i64 - rhs.0) as u32)
814    }
815}
816
817impl core::ops::Sub<u32> for ByteIndex {
818    type Output = ByteIndex;
819
820    fn sub(self, rhs: u32) -> Self {
821        Self(self.0 - rhs)
822    }
823}
824
825impl core::ops::SubAssign<ByteOffset> for ByteIndex {
826    fn sub_assign(&mut self, rhs: ByteOffset) {
827        *self = *self - rhs;
828    }
829}
830
831impl core::ops::SubAssign<u32> for ByteIndex {
832    fn sub_assign(&mut self, rhs: u32) {
833        self.0 -= rhs;
834    }
835}
836
837impl From<u32> for ByteIndex {
838    fn from(index: u32) -> Self {
839        Self(index)
840    }
841}
842
843impl From<ByteIndex> for u32 {
844    fn from(index: ByteIndex) -> Self {
845        index.0
846    }
847}
848
849impl fmt::Display for ByteIndex {
850    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
851        fmt::Display::fmt(&self.0, f)
852    }
853}
854
855/// An offset in bytes relative to some [ByteIndex]
856#[derive(Default, Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
857pub struct ByteOffset(i64);
858
859impl ByteOffset {
860    /// Compute the offset in bytes represented by the given `char`
861    pub fn from_char_len(c: char) -> ByteOffset {
862        Self(c.len_utf8() as i64)
863    }
864
865    /// Compute the offset in bytes represented by the given `str`
866    pub fn from_str_len(s: &str) -> ByteOffset {
867        Self(s.len() as i64)
868    }
869}
870
871impl core::ops::Add for ByteOffset {
872    type Output = ByteOffset;
873
874    fn add(self, rhs: Self) -> Self {
875        Self(self.0 + rhs.0)
876    }
877}
878
879impl core::ops::AddAssign for ByteOffset {
880    fn add_assign(&mut self, rhs: Self) {
881        self.0 += rhs.0;
882    }
883}
884
885impl core::ops::Sub for ByteOffset {
886    type Output = ByteOffset;
887
888    fn sub(self, rhs: Self) -> Self {
889        Self(self.0 - rhs.0)
890    }
891}
892
893impl core::ops::SubAssign for ByteOffset {
894    fn sub_assign(&mut self, rhs: Self) {
895        self.0 -= rhs.0;
896    }
897}
898
899impl fmt::Display for ByteOffset {
900    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
901        fmt::Display::fmt(&self.0, f)
902    }
903}
904
905macro_rules! declare_dual_number_and_index_type {
906    ($name:ident, $description:literal) => {
907        paste::paste! {
908            declare_dual_number_and_index_type!([<$name Index>], [<$name Number>], $description);
909        }
910    };
911
912    ($index_name:ident, $number_name:ident, $description:literal) => {
913        #[doc = concat!("A zero-indexed ", $description, " number")]
914        #[derive(Default, Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
915        #[cfg_attr(feature = "serde", derive(Deserialize, Serialize))]
916        #[cfg_attr(feature = "serde", serde(transparent))]
917        pub struct $index_name(pub u32);
918
919        impl $index_name {
920            #[doc = concat!("Convert to a [", stringify!($number_name), "]")]
921            pub const fn number(self) -> $number_name {
922                $number_name(unsafe { NonZeroU32::new_unchecked(self.0 + 1) })
923            }
924
925            /// Get the raw index value as a usize
926            #[inline(always)]
927            pub const fn to_usize(self) -> usize {
928                self.0 as usize
929            }
930
931            /// Get the raw index value as a u32
932            #[inline(always)]
933            pub const fn to_u32(self) -> u32 {
934                self.0
935            }
936
937            /// Add `offset` to this index, returning `None` on overflow
938            pub fn checked_add(self, offset: u32) -> Option<Self> {
939                self.0.checked_add(offset).map(Self)
940            }
941
942            /// Add a signed `offset` to this index, returning `None` on overflow
943            pub fn checked_add_signed(self, offset: i32) -> Option<Self> {
944                self.0.checked_add_signed(offset).map(Self)
945            }
946
947            /// Subtract `offset` from this index, returning `None` on underflow
948            pub fn checked_sub(self, offset: u32) -> Option<Self> {
949                self.0.checked_sub(offset).map(Self)
950            }
951
952            /// Add `offset` to this index, saturating to `u32::MAX` on overflow
953            pub const fn saturating_add(self, offset: u32) -> Self {
954                Self(self.0.saturating_add(offset))
955            }
956
957            /// Add a signed `offset` to this index, saturating to `0` on underflow, and `u32::MAX`
958            /// on overflow.
959            pub const fn saturating_add_signed(self, offset: i32) -> Self {
960                Self(self.0.saturating_add_signed(offset))
961            }
962
963            /// Subtract `offset` from this index, saturating to `0` on overflow
964            pub const fn saturating_sub(self, offset: u32) -> Self {
965                Self(self.0.saturating_sub(offset))
966            }
967        }
968
969        impl From<u32> for $index_name {
970            #[inline]
971            fn from(index: u32) -> Self {
972                Self(index)
973            }
974        }
975
976        impl From<$number_name> for $index_name {
977            #[inline]
978            fn from(index: $number_name) -> Self {
979                Self(index.to_u32() - 1)
980            }
981        }
982
983        impl core::ops::Add<u32> for $index_name {
984            type Output = Self;
985
986            #[inline]
987            fn add(self, rhs: u32) -> Self {
988                Self(self.0 + rhs)
989            }
990        }
991
992        impl core::ops::AddAssign<u32> for $index_name {
993            fn add_assign(&mut self, rhs: u32) {
994                let result = *self + rhs;
995                *self = result;
996            }
997        }
998
999        impl core::ops::Add<i32> for $index_name {
1000            type Output = Self;
1001
1002            fn add(self, rhs: i32) -> Self {
1003                self.checked_add_signed(rhs).expect("invalid offset: overflow occurred")
1004            }
1005        }
1006
1007        impl core::ops::AddAssign<i32> for $index_name {
1008            fn add_assign(&mut self, rhs: i32) {
1009                let result = *self + rhs;
1010                *self = result;
1011            }
1012        }
1013
1014        impl core::ops::Sub<u32> for $index_name {
1015            type Output = Self;
1016
1017            #[inline]
1018            fn sub(self, rhs: u32) -> Self {
1019                Self(self.0 - rhs)
1020            }
1021        }
1022
1023        impl core::ops::SubAssign<u32> for $index_name {
1024            fn sub_assign(&mut self, rhs: u32) {
1025                let result = *self - rhs;
1026                *self = result;
1027            }
1028        }
1029
1030        impl fmt::Display for $index_name {
1031            fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1032                fmt::Display::fmt(&self.0, f)
1033            }
1034        }
1035
1036        #[doc = concat!("A one-indexed ", $description, " number")]
1037        #[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
1038        #[cfg_attr(feature = "serde", derive(Deserialize, Serialize))]
1039        #[cfg_attr(feature = "serde", serde(transparent))]
1040        pub struct $number_name(NonZeroU32);
1041
1042        impl Default for $number_name {
1043            fn default() -> Self {
1044                Self(unsafe { NonZeroU32::new_unchecked(1) })
1045            }
1046        }
1047
1048        impl $number_name {
1049            pub const fn new(number: u32) -> Option<Self> {
1050                match NonZeroU32::new(number) {
1051                    Some(num) => Some(Self(num)),
1052                    None => None,
1053                }
1054            }
1055
1056            #[doc = concat!("Convert to a [", stringify!($index_name), "]")]
1057            pub const fn to_index(self) -> $index_name {
1058                $index_name(self.to_u32().saturating_sub(1))
1059            }
1060
1061            /// Get the raw value as a usize
1062            #[inline(always)]
1063            pub const fn to_usize(self) -> usize {
1064                self.0.get() as usize
1065            }
1066
1067            /// Get the raw value as a u32
1068            #[inline(always)]
1069            pub const fn to_u32(self) -> u32 {
1070                self.0.get()
1071            }
1072
1073            /// Add `offset` to this index, returning `None` on overflow
1074            pub fn checked_add(self, offset: u32) -> Option<Self> {
1075                self.0.checked_add(offset).map(Self)
1076            }
1077
1078            /// Add a signed `offset` to this index, returning `None` on overflow
1079            pub fn checked_add_signed(self, offset: i32) -> Option<Self> {
1080                self.0.get().checked_add_signed(offset).and_then(Self::new)
1081            }
1082
1083            /// Subtract `offset` from this index, returning `None` on underflow
1084            pub fn checked_sub(self, offset: u32) -> Option<Self> {
1085                self.0.get().checked_sub(offset).and_then(Self::new)
1086            }
1087
1088            /// Add `offset` to this index, saturating to `u32::MAX` on overflow
1089            pub const fn saturating_add(self, offset: u32) -> Self {
1090                Self(unsafe { NonZeroU32::new_unchecked(self.0.get().saturating_add(offset)) })
1091            }
1092
1093            /// Add a signed `offset` to this index, saturating to `0` on underflow, and `u32::MAX`
1094            /// on overflow.
1095            pub fn saturating_add_signed(self, offset: i32) -> Self {
1096                Self::new(self.to_u32().saturating_add_signed(offset)).unwrap_or_default()
1097            }
1098
1099            /// Subtract `offset` from this index, saturating to `0` on overflow
1100            pub fn saturating_sub(self, offset: u32) -> Self {
1101                Self::new(self.to_u32().saturating_sub(offset)).unwrap_or_default()
1102            }
1103        }
1104
1105        impl From<NonZeroU32> for $number_name {
1106            #[inline]
1107            fn from(index: NonZeroU32) -> Self {
1108                Self(index)
1109            }
1110        }
1111
1112        impl From<$index_name> for $number_name {
1113            #[inline]
1114            fn from(index: $index_name) -> Self {
1115                Self(unsafe { NonZeroU32::new_unchecked(index.to_u32() + 1) })
1116            }
1117        }
1118
1119        impl core::ops::Add<u32> for $number_name {
1120            type Output = Self;
1121
1122            #[inline]
1123            fn add(self, rhs: u32) -> Self {
1124                Self(unsafe { NonZeroU32::new_unchecked(self.0.get() + rhs) })
1125            }
1126        }
1127
1128        impl core::ops::AddAssign<u32> for $number_name {
1129            fn add_assign(&mut self, rhs: u32) {
1130                let result = *self + rhs;
1131                *self = result;
1132            }
1133        }
1134
1135        impl core::ops::Add<i32> for $number_name {
1136            type Output = Self;
1137
1138            fn add(self, rhs: i32) -> Self {
1139                self.to_u32()
1140                    .checked_add_signed(rhs)
1141                    .and_then(Self::new)
1142                    .expect("invalid offset: overflow occurred")
1143            }
1144        }
1145
1146        impl core::ops::AddAssign<i32> for $number_name {
1147            fn add_assign(&mut self, rhs: i32) {
1148                let result = *self + rhs;
1149                *self = result;
1150            }
1151        }
1152
1153        impl core::ops::Sub<u32> for $number_name {
1154            type Output = Self;
1155
1156            #[inline]
1157            fn sub(self, rhs: u32) -> Self {
1158                self.to_u32()
1159                    .checked_sub(rhs)
1160                    .and_then(Self::new)
1161                    .expect("invalid offset: overflow occurred")
1162            }
1163        }
1164
1165        impl core::ops::SubAssign<u32> for $number_name {
1166            fn sub_assign(&mut self, rhs: u32) {
1167                let result = *self - rhs;
1168                *self = result;
1169            }
1170        }
1171
1172        impl fmt::Display for $number_name {
1173            fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1174                fmt::Display::fmt(&self.0, f)
1175            }
1176        }
1177    };
1178}
1179
1180declare_dual_number_and_index_type!(Line, "line");
1181declare_dual_number_and_index_type!(Column, "column");
1182
1183#[cfg(test)]
1184mod tests {
1185    use super::*;
1186
1187    #[test]
1188    fn source_content_line_starts() {
1189        const CONTENT: &str = "\
1190begin
1191  push.1
1192  push.2
1193  add
1194end
1195";
1196        let content = SourceContent::new("masm", "foo.masm", CONTENT);
1197
1198        assert_eq!(content.line_count(), 6);
1199        assert_eq!(
1200            content
1201                .byte_slice(content.line_range(LineIndex(0)).expect("invalid line"))
1202                .expect("invalid byte range"),
1203            "begin\n".as_bytes()
1204        );
1205        assert_eq!(
1206            content
1207                .byte_slice(content.line_range(LineIndex(1)).expect("invalid line"))
1208                .expect("invalid byte range"),
1209            "  push.1\n".as_bytes()
1210        );
1211        assert_eq!(
1212            content
1213                .byte_slice(content.line_range(content.last_line_index()).expect("invalid line"))
1214                .expect("invalid byte range"),
1215            "".as_bytes()
1216        );
1217    }
1218
1219    #[test]
1220    fn source_content_line_starts_after_update() {
1221        const CONTENT: &str = "\
1222begin
1223  push.1
1224  push.2
1225  add
1226end
1227";
1228        const FRAGMENT: &str = "  push.2
1229  mul
1230end
1231";
1232        let mut content = SourceContent::new("masm", "foo.masm", CONTENT);
1233        content
1234            .update(FRAGMENT.to_string(), Some(Selection::from(LineIndex(4)..LineIndex(5))), 1)
1235            .expect("update failed");
1236
1237        assert_eq!(
1238            content.as_str(),
1239            "\
1240begin
1241  push.1
1242  push.2
1243  add
1244  push.2
1245  mul
1246end
1247"
1248        );
1249        assert_eq!(content.line_count(), 8);
1250        assert_eq!(
1251            content
1252                .byte_slice(content.line_range(LineIndex(0)).expect("invalid line"))
1253                .expect("invalid byte range"),
1254            "begin\n".as_bytes()
1255        );
1256        assert_eq!(
1257            content
1258                .byte_slice(content.line_range(LineIndex(3)).expect("invalid line"))
1259                .expect("invalid byte range"),
1260            "  add\n".as_bytes()
1261        );
1262        assert_eq!(
1263            content
1264                .byte_slice(content.line_range(LineIndex(4)).expect("invalid line"))
1265                .expect("invalid byte range"),
1266            "  push.2\n".as_bytes()
1267        );
1268        assert_eq!(
1269            content
1270                .byte_slice(content.line_range(content.last_line_index()).expect("invalid line"))
1271                .expect("invalid byte range"),
1272            "".as_bytes()
1273        );
1274    }
1275}