miden_debug_types/
source_file.rs

1use alloc::{
2    boxed::Box,
3    string::{String, ToString},
4    sync::Arc,
5    vec::Vec,
6};
7use core::{fmt, num::NonZeroU32, ops::Range};
8
9#[cfg(feature = "serde")]
10use serde::{Deserialize, Serialize};
11
12use super::{FileLineCol, Position, Selection, SourceId, SourceSpan, Uri};
13
14// SOURCE LANGUAGE
15// ================================================================================================
16
17#[derive(Debug, Copy, Clone, PartialEq, Eq)]
18pub enum SourceLanguage {
19    Masm,
20    Rust,
21    Other(&'static str),
22}
23
24impl AsRef<str> for SourceLanguage {
25    fn as_ref(&self) -> &str {
26        match self {
27            Self::Masm => "masm",
28            Self::Rust => "rust",
29            Self::Other(other) => other,
30        }
31    }
32}
33
34// SOURCE FILE
35// ================================================================================================
36
37/// A [SourceFile] represents a single file stored in a [super::SourceManager]
38#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
39#[cfg_attr(feature = "serde", derive(Deserialize, Serialize))]
40pub struct SourceFile {
41    /// The unique identifier allocated for this [SourceFile] by its owning [super::SourceManager]
42    id: SourceId,
43    /// The file content
44    #[cfg_attr(
45        feature = "serde",
46        serde(deserialize_with = "SourceContent::deserialize_and_recompute_line_starts")
47    )]
48    content: SourceContent,
49}
50
51impl miette::SourceCode for SourceFile {
52    fn read_span<'a>(
53        &'a self,
54        span: &miette::SourceSpan,
55        context_lines_before: usize,
56        context_lines_after: usize,
57    ) -> Result<alloc::boxed::Box<dyn miette::SpanContents<'a> + 'a>, miette::MietteError> {
58        let mut start =
59            u32::try_from(span.offset()).map_err(|_| miette::MietteError::OutOfBounds)?;
60        let len = u32::try_from(span.len()).map_err(|_| miette::MietteError::OutOfBounds)?;
61        let mut end = start.checked_add(len).ok_or(miette::MietteError::OutOfBounds)?;
62        if context_lines_before > 0 {
63            let line_index = self.content.line_index(start.into());
64            let start_line_index = line_index.saturating_sub(context_lines_before as u32);
65            start = self.content.line_start(start_line_index).map(|idx| idx.to_u32()).unwrap_or(0);
66        }
67        if context_lines_after > 0 {
68            let line_index = self.content.line_index(end.into());
69            let end_line_index = line_index
70                .checked_add(context_lines_after as u32)
71                .ok_or(miette::MietteError::OutOfBounds)?;
72            end = self
73                .content
74                .line_range(end_line_index)
75                .map(|range| range.end.to_u32())
76                .unwrap_or_else(|| self.content.source_range().end.to_u32());
77        }
78        Ok(Box::new(ScopedSourceFileRef {
79            file: self,
80            span: miette::SourceSpan::new((start as usize).into(), end.abs_diff(start) as usize),
81        }))
82    }
83}
84
85impl SourceFile {
86    /// Create a new [SourceFile] from its raw components
87    pub fn new(id: SourceId, lang: SourceLanguage, uri: Uri, content: impl Into<Box<str>>) -> Self {
88        let content = SourceContent::new(lang, uri, content.into());
89        Self { id, content }
90    }
91
92    /// This function is intended for use by [super::SourceManager] implementations that need to
93    /// construct a [SourceFile] from its raw components (i.e. the identifier for the source file
94    /// and its content).
95    ///
96    /// Since the only entity that should be constructing a [SourceId] is a [super::SourceManager],
97    /// it is only valid to call this function in one of two scenarios:
98    ///
99    /// 1. You are a [super::SourceManager] constructing a [SourceFile] after allocating a
100    ///    [SourceId]
101    /// 2. You pass [`SourceId::default()`], i.e. [`SourceId::UNKNOWN`] for the source identifier.
102    ///    The resulting [SourceFile] will be valid and safe to use in a context where there isn't a
103    ///    [super::SourceManager] present. If there is a source manager in use, then constructing
104    ///    detached [SourceFile]s is _not_ recommended, because it will make it confusing to
105    ///    determine whether a given [SourceFile] reference is safe to use.
106    ///
107    /// You should rarely, if ever, fall in camp 2 - but it can be handy in some narrow cases
108    pub fn from_raw_parts(id: SourceId, content: SourceContent) -> Self {
109        Self { id, content }
110    }
111
112    /// Get the [SourceId] associated with this file
113    pub const fn id(&self) -> SourceId {
114        self.id
115    }
116
117    /// Get the name of this source file
118    pub fn uri(&self) -> &Uri {
119        self.content.uri()
120    }
121
122    /// Returns a reference to the underlying [SourceContent]
123    pub fn content(&self) -> &SourceContent {
124        &self.content
125    }
126
127    /// Returns a mutable reference to the underlying [SourceContent]
128    pub fn content_mut(&mut self) -> &mut SourceContent {
129        &mut self.content
130    }
131
132    /// Returns the number of lines in this file
133    pub fn line_count(&self) -> usize {
134        self.content.line_starts.len()
135    }
136
137    /// Returns the number of bytes in this file
138    pub fn len(&self) -> usize {
139        self.content.len()
140    }
141
142    /// Returns true if this file is empty
143    pub fn is_empty(&self) -> bool {
144        self.content.is_empty()
145    }
146
147    /// Get the underlying content of this file
148    #[inline(always)]
149    pub fn as_str(&self) -> &str {
150        self.content.as_str()
151    }
152
153    /// Get the underlying content of this file as a byte slice
154    #[inline(always)]
155    pub fn as_bytes(&self) -> &[u8] {
156        self.content.as_bytes()
157    }
158
159    /// Returns a [SourceSpan] covering the entirety of this file
160    #[inline]
161    pub fn source_span(&self) -> SourceSpan {
162        let range = self.content.source_range();
163        SourceSpan::new(self.id, range.start.0..range.end.0)
164    }
165
166    /// Returns a subset of the underlying content as a string slice.
167    ///
168    /// The bounds of the given span are byte indices, _not_ character indices.
169    ///
170    /// Returns `None` if the given span is out of bounds, or if the bounds do not
171    /// fall on valid UTF-8 character boundaries.
172    #[inline(always)]
173    pub fn source_slice(&self, span: impl Into<Range<usize>>) -> Option<&str> {
174        self.content.source_slice(span)
175    }
176
177    /// Returns a [SourceFileRef] corresponding to the bytes contained in the specified span.
178    pub fn slice(self: &Arc<Self>, span: impl Into<Range<u32>>) -> SourceFileRef {
179        SourceFileRef::new(Arc::clone(self), span)
180    }
181
182    /// Get a [SourceSpan] which points to the first byte of the character at `column` on `line`
183    ///
184    /// Returns `None` if the given line/column is out of bounds for this file.
185    pub fn line_column_to_span(
186        &self,
187        line: LineNumber,
188        column: ColumnNumber,
189    ) -> Option<SourceSpan> {
190        let offset = self.content.line_column_to_offset(line.into(), column.into())?;
191        Some(SourceSpan::at(self.id, offset.0))
192    }
193
194    /// Get a [FileLineCol] equivalent to the start of the given [SourceSpan]
195    pub fn location(&self, span: SourceSpan) -> FileLineCol {
196        assert_eq!(span.source_id(), self.id, "mismatched source ids");
197
198        self.content
199            .location(ByteIndex(span.into_range().start))
200            .expect("invalid source span: starting byte is out of bounds")
201    }
202}
203
204impl AsRef<str> for SourceFile {
205    #[inline(always)]
206    fn as_ref(&self) -> &str {
207        self.as_str()
208    }
209}
210
211impl AsRef<[u8]> for SourceFile {
212    #[inline(always)]
213    fn as_ref(&self) -> &[u8] {
214        self.as_bytes()
215    }
216}
217
218// SOURCE FILE REF
219// ================================================================================================
220
221/// A reference to a specific spanned region of a [SourceFile], that provides access to the actual
222/// [SourceFile], but scoped to the span it was created with.
223///
224/// This is useful in error types that implement [miette::Diagnostic], as it contains all of the
225/// data necessary to render the source code being referenced, without a [super::SourceManager] on
226/// hand.
227#[derive(Debug, Clone)]
228pub struct SourceFileRef {
229    file: Arc<SourceFile>,
230    span: SourceSpan,
231}
232
233impl SourceFileRef {
234    /// Create a [SourceFileRef] from a [SourceFile] and desired span (in bytes)
235    ///
236    /// The given span will be constrained to the bytes of `file`, so a span that reaches out of
237    /// bounds will have its end bound set to the last byte of the file.
238    pub fn new(file: Arc<SourceFile>, span: impl Into<Range<u32>>) -> Self {
239        let span = span.into();
240        let end = core::cmp::min(span.end, file.len() as u32);
241        let span = SourceSpan::new(file.id(), span.start..end);
242        Self { file, span }
243    }
244
245    /// Returns a ref-counted handle to the underlying [SourceFile]
246    pub fn source_file(&self) -> Arc<SourceFile> {
247        self.file.clone()
248    }
249
250    /// Returns the URI of the file this [SourceFileRef] is selecting
251    pub fn uri(&self) -> &Uri {
252        self.file.uri()
253    }
254
255    /// Returns the [SourceSpan] selected by this [SourceFileRef]
256    pub const fn span(&self) -> SourceSpan {
257        self.span
258    }
259
260    /// Returns the underlying `str` selected by this [SourceFileRef]
261    pub fn as_str(&self) -> &str {
262        self.file.source_slice(self.span).unwrap()
263    }
264
265    /// Returns the underlying bytes selected by this [SourceFileRef]
266    #[inline]
267    pub fn as_bytes(&self) -> &[u8] {
268        self.as_str().as_bytes()
269    }
270
271    /// Returns the number of bytes represented by the subset of the underlying file that is covered
272    /// by this [SourceFileRef]
273    pub fn len(&self) -> usize {
274        self.span.len()
275    }
276
277    /// Returns true if this selection is empty
278    pub fn is_empty(&self) -> bool {
279        self.len() == 0
280    }
281}
282
283impl Eq for SourceFileRef {}
284
285impl PartialEq for SourceFileRef {
286    fn eq(&self, other: &Self) -> bool {
287        self.as_str() == other.as_str()
288    }
289}
290
291impl Ord for SourceFileRef {
292    fn cmp(&self, other: &Self) -> core::cmp::Ordering {
293        self.as_str().cmp(other.as_str())
294    }
295}
296
297impl PartialOrd for SourceFileRef {
298    #[inline(always)]
299    fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> {
300        Some(self.cmp(other))
301    }
302}
303
304impl core::hash::Hash for SourceFileRef {
305    fn hash<H: core::hash::Hasher>(&self, state: &mut H) {
306        self.as_str().hash(state);
307    }
308}
309
310impl AsRef<str> for SourceFileRef {
311    #[inline(always)]
312    fn as_ref(&self) -> &str {
313        self.as_str()
314    }
315}
316
317impl AsRef<[u8]> for SourceFileRef {
318    #[inline(always)]
319    fn as_ref(&self) -> &[u8] {
320        self.as_bytes()
321    }
322}
323
324impl From<&SourceFileRef> for miette::SourceSpan {
325    fn from(source: &SourceFileRef) -> Self {
326        source.span.into()
327    }
328}
329
330/// Used to implement [miette::SpanContents] for [SourceFile] and [SourceFileRef]
331struct ScopedSourceFileRef<'a> {
332    file: &'a SourceFile,
333    span: miette::SourceSpan,
334}
335
336impl<'a> miette::SpanContents<'a> for ScopedSourceFileRef<'a> {
337    #[inline]
338    fn data(&self) -> &'a [u8] {
339        let start = self.span.offset();
340        let end = start + self.span.len();
341        &self.file.as_bytes()[start..end]
342    }
343
344    #[inline]
345    fn span(&self) -> &miette::SourceSpan {
346        &self.span
347    }
348
349    fn line(&self) -> usize {
350        let offset = self.span.offset() as u32;
351        self.file.content.line_index(offset.into()).to_usize()
352    }
353
354    fn column(&self) -> usize {
355        let start = self.span.offset() as u32;
356        let end = start + self.span.len() as u32;
357        let span = SourceSpan::new(self.file.id(), start..end);
358        let loc = self.file.location(span);
359        loc.column.to_index().to_usize()
360    }
361
362    #[inline]
363    fn line_count(&self) -> usize {
364        self.file.line_count()
365    }
366
367    #[inline]
368    fn name(&self) -> Option<&str> {
369        Some(self.file.uri().as_ref())
370    }
371
372    #[inline]
373    fn language(&self) -> Option<&str> {
374        None
375    }
376}
377
378impl miette::SourceCode for SourceFileRef {
379    #[inline]
380    fn read_span<'a>(
381        &'a self,
382        span: &miette::SourceSpan,
383        context_lines_before: usize,
384        context_lines_after: usize,
385    ) -> Result<alloc::boxed::Box<dyn miette::SpanContents<'a> + 'a>, miette::MietteError> {
386        self.file.read_span(span, context_lines_before, context_lines_after)
387    }
388}
389
390// SOURCE CONTENT
391// ================================================================================================
392
393/// Represents key information about a source file and its content:
394///
395/// * The path to the file (or its name, in the case of virtual files)
396/// * The content of the file
397/// * The byte offsets of every line in the file, for use in looking up line/column information
398#[derive(Clone)]
399#[cfg_attr(feature = "serde", derive(Deserialize, Serialize))]
400pub struct SourceContent {
401    /// The language identifier for this source file
402    language: Box<str>,
403    /// The path (or name) of this file
404    uri: Uri,
405    /// The underlying content of this file
406    content: String,
407    /// The byte offsets for each line in this file
408    #[cfg_attr(feature = "serde", serde(default, skip))]
409    line_starts: Vec<ByteIndex>,
410    /// The document version
411    #[cfg_attr(feature = "serde", serde(default))]
412    version: i32,
413}
414
415impl fmt::Debug for SourceContent {
416    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
417        let Self {
418            language,
419            uri,
420            content,
421            line_starts,
422            version,
423        } = self;
424        f.debug_struct("SourceContent")
425            .field("version", version)
426            .field("language", language)
427            .field("uri", uri)
428            .field("size_in_bytes", &content.len())
429            .field("line_count", &line_starts.len())
430            .field("content", content)
431            .finish()
432    }
433}
434
435impl Eq for SourceContent {}
436
437impl PartialEq for SourceContent {
438    #[inline]
439    fn eq(&self, other: &Self) -> bool {
440        self.language == other.language && self.uri == other.uri && self.content == other.content
441    }
442}
443
444impl Ord for SourceContent {
445    #[inline]
446    fn cmp(&self, other: &Self) -> core::cmp::Ordering {
447        self.uri.cmp(&other.uri).then_with(|| self.content.cmp(&other.content))
448    }
449}
450
451impl PartialOrd for SourceContent {
452    #[inline]
453    fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> {
454        Some(self.cmp(other))
455    }
456}
457
458impl core::hash::Hash for SourceContent {
459    fn hash<H: core::hash::Hasher>(&self, state: &mut H) {
460        self.language.hash(state);
461        self.uri.hash(state);
462        self.content.hash(state);
463    }
464}
465
466#[derive(Debug, thiserror::Error)]
467pub enum SourceContentUpdateError {
468    #[error("invalid content selection: start position of {}:{} is out of bounds", .0.line, .0.character)]
469    InvalidSelectionStart(Position),
470    #[error("invalid content selection: end position of {}:{} is out of bounds", .0.line, .0.character)]
471    InvalidSelectionEnd(Position),
472}
473
474impl SourceContent {
475    /// Create a new [SourceContent] from the (possibly virtual) file path, and its content as a
476    /// UTF-8 string.
477    ///
478    /// When created, the line starts for this file will be computed, which requires scanning the
479    /// file content once.
480    pub fn new(language: impl AsRef<str>, uri: impl Into<Uri>, content: impl Into<String>) -> Self {
481        let language = language.as_ref().to_string().into_boxed_str();
482        let content: String = content.into();
483        let bytes = content.as_bytes();
484
485        assert!(
486            bytes.len() < u32::MAX as usize,
487            "unsupported source file: current maximum supported length in bytes is 2^32"
488        );
489
490        let line_starts = compute_line_starts(&content, None);
491
492        Self {
493            language,
494            uri: uri.into(),
495            content,
496            line_starts,
497            version: 0,
498        }
499    }
500
501    /// Get the language identifier of this source file
502    pub fn language(&self) -> &str {
503        &self.language
504    }
505
506    /// Get the current version of this source file's content
507    pub fn version(&self) -> i32 {
508        self.version
509    }
510
511    /// Set the current version of this content
512    #[inline(always)]
513    pub fn set_version(&mut self, version: i32) {
514        self.version = version;
515    }
516
517    /// Get the URI of this source file
518    #[inline]
519    pub fn uri(&self) -> &Uri {
520        &self.uri
521    }
522
523    /// Returns the underlying content as a string slice
524    #[inline(always)]
525    pub fn as_str(&self) -> &str {
526        self.content.as_ref()
527    }
528
529    /// Returns the underlying content as a byte slice
530    #[inline(always)]
531    pub fn as_bytes(&self) -> &[u8] {
532        self.content.as_bytes()
533    }
534
535    /// Returns the size in bytes of the underlying content
536    #[inline(always)]
537    pub fn len(&self) -> usize {
538        self.content.len()
539    }
540
541    /// Returns true if the underlying content is empty
542    #[inline(always)]
543    pub fn is_empty(&self) -> bool {
544        self.content.is_empty()
545    }
546
547    /// Returns the range of valid byte indices for this file
548    #[inline]
549    pub fn source_range(&self) -> Range<ByteIndex> {
550        ByteIndex(0)..ByteIndex(self.content.len() as u32)
551    }
552
553    /// Returns a subset of the underlying content as a string slice.
554    ///
555    /// The bounds of the given span are byte indices, _not_ character indices.
556    ///
557    /// Returns `None` if the given span is out of bounds, or if the bounds do not
558    /// fall on valid UTF-8 character boundaries.
559    #[inline(always)]
560    pub fn source_slice(&self, span: impl Into<Range<usize>>) -> Option<&str> {
561        self.as_str().get(span.into())
562    }
563
564    /// Returns a subset of the underlying content as a byte slice.
565    ///
566    /// Returns `None` if the given span is out of bounds
567    #[inline(always)]
568    pub fn byte_slice(&self, span: impl Into<Range<ByteIndex>>) -> Option<&[u8]> {
569        let Range { start, end } = span.into();
570        self.as_bytes().get(start.to_usize()..end.to_usize())
571    }
572
573    /// Like [Self::source_slice], but the slice is computed like a selection in an editor, i.e.
574    /// based on line/column positions, rather than raw character indices.
575    ///
576    /// This is useful when mapping LSP operations to content in the source file.
577    pub fn select(&self, mut range: Selection) -> Option<&str> {
578        range.canonicalize();
579
580        let start = self.line_column_to_offset(range.start.line, range.start.character)?;
581        let end = self.line_column_to_offset(range.end.line, range.end.character)?;
582
583        Some(&self.as_str()[start.to_usize()..end.to_usize()])
584    }
585
586    /// Returns the number of lines in the source content
587    pub fn line_count(&self) -> usize {
588        self.line_starts.len()
589    }
590
591    /// Returns the byte index at which the line corresponding to `line_index` starts
592    ///
593    /// Returns `None` if the given index is out of bounds
594    pub fn line_start(&self, line_index: LineIndex) -> Option<ByteIndex> {
595        self.line_starts.get(line_index.to_usize()).copied()
596    }
597
598    /// Returns the index of the last line in this file
599    pub fn last_line_index(&self) -> LineIndex {
600        LineIndex(self.line_count().saturating_sub(1).try_into().expect("too many lines in file"))
601    }
602
603    /// Get the range of byte indices covered by the given line
604    pub fn line_range(&self, line_index: LineIndex) -> Option<Range<ByteIndex>> {
605        let line_start = self.line_start(line_index)?;
606        match self.line_start(line_index + 1) {
607            Some(line_end) => Some(line_start..line_end),
608            None => Some(line_start..ByteIndex(self.content.len() as u32)),
609        }
610    }
611
612    /// Get the index of the line to which `byte_index` belongs
613    pub fn line_index(&self, byte_index: ByteIndex) -> LineIndex {
614        match self.line_starts.binary_search(&byte_index) {
615            Ok(line) => LineIndex(line as u32),
616            Err(next_line) => LineIndex(next_line as u32 - 1),
617        }
618    }
619
620    /// Get the [ByteIndex] corresponding to the given line and column indices.
621    ///
622    /// Returns `None` if the line or column indices are out of bounds.
623    pub fn line_column_to_offset(
624        &self,
625        line_index: LineIndex,
626        column_index: ColumnIndex,
627    ) -> Option<ByteIndex> {
628        let column_index = column_index.to_usize();
629        let line_span = self.line_range(line_index)?;
630        let line_src = self
631            .content
632            .get(line_span.start.to_usize()..line_span.end.to_usize())
633            .expect("invalid line boundaries: invalid utf-8");
634        if line_src.len() < column_index {
635            return None;
636        }
637        let (pre, _) = line_src.split_at(column_index);
638        let start = line_span.start;
639        Some(start + ByteOffset::from_str_len(pre))
640    }
641
642    /// Get a [FileLineCol] corresponding to the line/column in this file at which `byte_index`
643    /// occurs
644    pub fn location(&self, byte_index: ByteIndex) -> Option<FileLineCol> {
645        let line_index = self.line_index(byte_index);
646        let line_start_index = self.line_start(line_index)?;
647        let line_src = self.content.get(line_start_index.to_usize()..byte_index.to_usize())?;
648        let column_index = ColumnIndex::from(line_src.chars().count() as u32);
649        Some(FileLineCol {
650            uri: self.uri.clone(),
651            line: line_index.number(),
652            column: column_index.number(),
653        })
654    }
655
656    /// Update the source document after being notified of a change event.
657    ///
658    /// The `version` indicates the new version of the document
659    ///
660    /// NOTE: This is intended to update a [super::SourceManager]'s view of the content of the
661    /// document, _not_ to perform an update against the actual file, wherever it may be.
662    pub fn update(
663        &mut self,
664        text: String,
665        range: Option<Selection>,
666        version: i32,
667    ) -> Result<(), SourceContentUpdateError> {
668        match range {
669            Some(range) => {
670                let start = self
671                    .line_column_to_offset(range.start.line, range.start.character)
672                    .ok_or(SourceContentUpdateError::InvalidSelectionStart(range.start))?
673                    .to_usize();
674                let end = self
675                    .line_column_to_offset(range.end.line, range.end.character)
676                    .ok_or(SourceContentUpdateError::InvalidSelectionEnd(range.end))?
677                    .to_usize();
678                assert!(start <= end, "start of range must be less than end, got {start}..{end}",);
679                self.content.replace_range(start..end, &text);
680
681                let added_line_starts = compute_line_starts(&text, Some(start as u32));
682                let num_added = added_line_starts.len();
683                let splice_start = range.start.line.to_usize() + 1;
684                // Determine deletion range in line_starts to respect Selection semantics.
685                // For multi-line edits, remove line starts from (start.line + 1) up to end.line
686                // inclusive, since all intervening newlines are removed by the
687                // replacement, regardless of end.character.
688                enum Deletion {
689                    Empty,
690                    Inclusive(usize), // inclusive end index
691                }
692                let deletion = if range.start.line == range.end.line {
693                    Deletion::Empty
694                } else {
695                    let mut end_line_for_splice = range.end.line.to_usize();
696                    if !self.line_starts.is_empty() {
697                        let max_idx = self.line_starts.len() - 1;
698                        if end_line_for_splice > max_idx {
699                            end_line_for_splice = max_idx;
700                        }
701                    }
702                    if end_line_for_splice >= splice_start {
703                        Deletion::Inclusive(end_line_for_splice)
704                    } else {
705                        Deletion::Empty
706                    }
707                };
708
709                match deletion {
710                    Deletion::Empty => {
711                        self.line_starts.splice(splice_start..splice_start, added_line_starts);
712                    },
713                    Deletion::Inclusive(end_idx) => {
714                        self.line_starts.splice(splice_start..=end_idx, added_line_starts);
715                    },
716                }
717
718                let diff =
719                    (text.len() as i32).saturating_sub_unsigned((end as u32) - (start as u32));
720                if diff != 0 {
721                    for i in (splice_start + num_added)..self.line_starts.len() {
722                        self.line_starts[i] =
723                            ByteIndex(self.line_starts[i].to_u32().saturating_add_signed(diff));
724                    }
725                }
726            },
727            None => {
728                self.line_starts = compute_line_starts(&text, None);
729                self.content = text;
730            },
731        }
732
733        self.version = version;
734
735        Ok(())
736    }
737}
738
739#[cfg(feature = "serde")]
740impl SourceContent {
741    fn deserialize_and_recompute_line_starts<'de, D>(deserializer: D) -> Result<Self, D::Error>
742    where
743        D: serde::Deserializer<'de>,
744    {
745        let mut content = SourceContent::deserialize(deserializer)?;
746        content.line_starts = compute_line_starts(&content.content, None);
747        Ok(content)
748    }
749}
750
751fn compute_line_starts(text: &str, text_offset: Option<u32>) -> Vec<ByteIndex> {
752    let bytes = text.as_bytes();
753    let initial_line_offset = match text_offset {
754        Some(_) => None,
755        None => Some(ByteIndex(0)),
756    };
757    let text_offset = text_offset.unwrap_or(0);
758    initial_line_offset
759        .into_iter()
760        .chain(memchr::memchr_iter(b'\n', bytes).filter_map(|mut offset| {
761            // Determine if the newline has any preceding escapes
762            let mut preceding_escapes = 0;
763            let line_start = offset + 1;
764            while let Some(prev_offset) = offset.checked_sub(1) {
765                if bytes[prev_offset] == b'\\' {
766                    offset = prev_offset;
767                    preceding_escapes += 1;
768                    continue;
769                }
770                break;
771            }
772
773            // If the newline is escaped, do not count it as a new line
774            let is_escaped = preceding_escapes > 0 && preceding_escapes % 2 != 0;
775            if is_escaped {
776                None
777            } else {
778                Some(ByteIndex(text_offset + line_start as u32))
779            }
780        }))
781        .collect()
782}
783
784// SOURCE CONTENT INDICES
785// ================================================================================================
786
787/// An index representing the offset in bytes from the start of a source file
788#[derive(Default, Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
789#[cfg_attr(feature = "serde", derive(Deserialize, Serialize))]
790#[cfg_attr(feature = "serde", serde(transparent))]
791pub struct ByteIndex(pub u32);
792
793impl ByteIndex {
794    /// Create a [ByteIndex] from a raw `u32` index
795    pub const fn new(index: u32) -> Self {
796        Self(index)
797    }
798
799    /// Get the raw index as a usize
800    #[inline(always)]
801    pub const fn to_usize(self) -> usize {
802        self.0 as usize
803    }
804
805    /// Get the raw index as a u32
806    #[inline(always)]
807    pub const fn to_u32(self) -> u32 {
808        self.0
809    }
810}
811
812impl core::ops::Add<ByteOffset> for ByteIndex {
813    type Output = ByteIndex;
814
815    fn add(self, rhs: ByteOffset) -> Self {
816        Self((self.0 as i64 + rhs.0) as u32)
817    }
818}
819
820impl core::ops::Add<u32> for ByteIndex {
821    type Output = ByteIndex;
822
823    fn add(self, rhs: u32) -> Self {
824        Self(self.0 + rhs)
825    }
826}
827
828impl core::ops::AddAssign<ByteOffset> for ByteIndex {
829    fn add_assign(&mut self, rhs: ByteOffset) {
830        *self = *self + rhs;
831    }
832}
833
834impl core::ops::AddAssign<u32> for ByteIndex {
835    fn add_assign(&mut self, rhs: u32) {
836        self.0 += rhs;
837    }
838}
839
840impl core::ops::Sub<ByteOffset> for ByteIndex {
841    type Output = ByteIndex;
842
843    fn sub(self, rhs: ByteOffset) -> Self {
844        Self((self.0 as i64 - rhs.0) as u32)
845    }
846}
847
848impl core::ops::Sub<u32> for ByteIndex {
849    type Output = ByteIndex;
850
851    fn sub(self, rhs: u32) -> Self {
852        Self(self.0 - rhs)
853    }
854}
855
856impl core::ops::SubAssign<ByteOffset> for ByteIndex {
857    fn sub_assign(&mut self, rhs: ByteOffset) {
858        *self = *self - rhs;
859    }
860}
861
862impl core::ops::SubAssign<u32> for ByteIndex {
863    fn sub_assign(&mut self, rhs: u32) {
864        self.0 -= rhs;
865    }
866}
867
868impl From<u32> for ByteIndex {
869    fn from(index: u32) -> Self {
870        Self(index)
871    }
872}
873
874impl From<ByteIndex> for u32 {
875    fn from(index: ByteIndex) -> Self {
876        index.0
877    }
878}
879
880impl fmt::Display for ByteIndex {
881    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
882        fmt::Display::fmt(&self.0, f)
883    }
884}
885
886/// An offset in bytes relative to some [ByteIndex]
887#[derive(Default, Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
888pub struct ByteOffset(i64);
889
890impl ByteOffset {
891    /// Compute the offset in bytes represented by the given `char`
892    pub fn from_char_len(c: char) -> ByteOffset {
893        Self(c.len_utf8() as i64)
894    }
895
896    /// Compute the offset in bytes represented by the given `str`
897    pub fn from_str_len(s: &str) -> ByteOffset {
898        Self(s.len() as i64)
899    }
900}
901
902impl core::ops::Add for ByteOffset {
903    type Output = ByteOffset;
904
905    fn add(self, rhs: Self) -> Self {
906        Self(self.0 + rhs.0)
907    }
908}
909
910impl core::ops::AddAssign for ByteOffset {
911    fn add_assign(&mut self, rhs: Self) {
912        self.0 += rhs.0;
913    }
914}
915
916impl core::ops::Sub for ByteOffset {
917    type Output = ByteOffset;
918
919    fn sub(self, rhs: Self) -> Self {
920        Self(self.0 - rhs.0)
921    }
922}
923
924impl core::ops::SubAssign for ByteOffset {
925    fn sub_assign(&mut self, rhs: Self) {
926        self.0 -= rhs.0;
927    }
928}
929
930impl fmt::Display for ByteOffset {
931    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
932        fmt::Display::fmt(&self.0, f)
933    }
934}
935
936macro_rules! declare_dual_number_and_index_type {
937    ($name:ident, $description:literal) => {
938        paste::paste! {
939            declare_dual_number_and_index_type!([<$name Index>], [<$name Number>], $description);
940        }
941    };
942
943    ($index_name:ident, $number_name:ident, $description:literal) => {
944        #[doc = concat!("A zero-indexed ", $description, " number")]
945        #[derive(Default, Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
946        #[cfg_attr(feature = "serde", derive(Deserialize, Serialize))]
947        #[cfg_attr(feature = "serde", serde(transparent))]
948        pub struct $index_name(pub u32);
949
950        impl $index_name {
951            #[doc = concat!("Convert to a [", stringify!($number_name), "]")]
952            pub const fn number(self) -> $number_name {
953                $number_name(unsafe { NonZeroU32::new_unchecked(self.0 + 1) })
954            }
955
956            /// Get the raw index value as a usize
957            #[inline(always)]
958            pub const fn to_usize(self) -> usize {
959                self.0 as usize
960            }
961
962            /// Get the raw index value as a u32
963            #[inline(always)]
964            pub const fn to_u32(self) -> u32 {
965                self.0
966            }
967
968            /// Add `offset` to this index, returning `None` on overflow
969            pub fn checked_add(self, offset: u32) -> Option<Self> {
970                self.0.checked_add(offset).map(Self)
971            }
972
973            /// Add a signed `offset` to this index, returning `None` on overflow
974            pub fn checked_add_signed(self, offset: i32) -> Option<Self> {
975                self.0.checked_add_signed(offset).map(Self)
976            }
977
978            /// Subtract `offset` from this index, returning `None` on underflow
979            pub fn checked_sub(self, offset: u32) -> Option<Self> {
980                self.0.checked_sub(offset).map(Self)
981            }
982
983            /// Add `offset` to this index, saturating to `u32::MAX` on overflow
984            pub const fn saturating_add(self, offset: u32) -> Self {
985                Self(self.0.saturating_add(offset))
986            }
987
988            /// Add a signed `offset` to this index, saturating to `0` on underflow, and `u32::MAX`
989            /// on overflow.
990            pub const fn saturating_add_signed(self, offset: i32) -> Self {
991                Self(self.0.saturating_add_signed(offset))
992            }
993
994            /// Subtract `offset` from this index, saturating to `0` on overflow
995            pub const fn saturating_sub(self, offset: u32) -> Self {
996                Self(self.0.saturating_sub(offset))
997            }
998        }
999
1000        impl From<u32> for $index_name {
1001            #[inline]
1002            fn from(index: u32) -> Self {
1003                Self(index)
1004            }
1005        }
1006
1007        impl From<$number_name> for $index_name {
1008            #[inline]
1009            fn from(index: $number_name) -> Self {
1010                Self(index.to_u32() - 1)
1011            }
1012        }
1013
1014        impl core::ops::Add<u32> for $index_name {
1015            type Output = Self;
1016
1017            #[inline]
1018            fn add(self, rhs: u32) -> Self {
1019                Self(self.0 + rhs)
1020            }
1021        }
1022
1023        impl core::ops::AddAssign<u32> for $index_name {
1024            fn add_assign(&mut self, rhs: u32) {
1025                let result = *self + rhs;
1026                *self = result;
1027            }
1028        }
1029
1030        impl core::ops::Add<i32> for $index_name {
1031            type Output = Self;
1032
1033            fn add(self, rhs: i32) -> Self {
1034                self.checked_add_signed(rhs).expect("invalid offset: overflow occurred")
1035            }
1036        }
1037
1038        impl core::ops::AddAssign<i32> for $index_name {
1039            fn add_assign(&mut self, rhs: i32) {
1040                let result = *self + rhs;
1041                *self = result;
1042            }
1043        }
1044
1045        impl core::ops::Sub<u32> for $index_name {
1046            type Output = Self;
1047
1048            #[inline]
1049            fn sub(self, rhs: u32) -> Self {
1050                Self(self.0 - rhs)
1051            }
1052        }
1053
1054        impl core::ops::SubAssign<u32> for $index_name {
1055            fn sub_assign(&mut self, rhs: u32) {
1056                let result = *self - rhs;
1057                *self = result;
1058            }
1059        }
1060
1061        impl fmt::Display for $index_name {
1062            fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1063                fmt::Display::fmt(&self.0, f)
1064            }
1065        }
1066
1067        #[doc = concat!("A one-indexed ", $description, " number")]
1068        #[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
1069        #[cfg_attr(feature = "serde", derive(Deserialize, Serialize))]
1070        #[cfg_attr(feature = "serde", serde(transparent))]
1071        pub struct $number_name(NonZeroU32);
1072
1073        impl Default for $number_name {
1074            fn default() -> Self {
1075                Self(unsafe { NonZeroU32::new_unchecked(1) })
1076            }
1077        }
1078
1079        impl $number_name {
1080            pub const fn new(number: u32) -> Option<Self> {
1081                match NonZeroU32::new(number) {
1082                    Some(num) => Some(Self(num)),
1083                    None => None,
1084                }
1085            }
1086
1087            #[doc = concat!("Convert to a [", stringify!($index_name), "]")]
1088            pub const fn to_index(self) -> $index_name {
1089                $index_name(self.to_u32().saturating_sub(1))
1090            }
1091
1092            /// Get the raw value as a usize
1093            #[inline(always)]
1094            pub const fn to_usize(self) -> usize {
1095                self.0.get() as usize
1096            }
1097
1098            /// Get the raw value as a u32
1099            #[inline(always)]
1100            pub const fn to_u32(self) -> u32 {
1101                self.0.get()
1102            }
1103
1104            /// Add `offset` to this index, returning `None` on overflow
1105            pub fn checked_add(self, offset: u32) -> Option<Self> {
1106                self.0.checked_add(offset).map(Self)
1107            }
1108
1109            /// Add a signed `offset` to this index, returning `None` on overflow
1110            pub fn checked_add_signed(self, offset: i32) -> Option<Self> {
1111                self.0.get().checked_add_signed(offset).and_then(Self::new)
1112            }
1113
1114            /// Subtract `offset` from this index, returning `None` on underflow
1115            pub fn checked_sub(self, offset: u32) -> Option<Self> {
1116                self.0.get().checked_sub(offset).and_then(Self::new)
1117            }
1118
1119            /// Add `offset` to this index, saturating to `u32::MAX` on overflow
1120            pub const fn saturating_add(self, offset: u32) -> Self {
1121                Self(unsafe { NonZeroU32::new_unchecked(self.0.get().saturating_add(offset)) })
1122            }
1123
1124            /// Add a signed `offset` to this index, saturating to `0` on underflow, and `u32::MAX`
1125            /// on overflow.
1126            pub fn saturating_add_signed(self, offset: i32) -> Self {
1127                Self::new(self.to_u32().saturating_add_signed(offset)).unwrap_or_default()
1128            }
1129
1130            /// Subtract `offset` from this index, saturating to `0` on overflow
1131            pub fn saturating_sub(self, offset: u32) -> Self {
1132                Self::new(self.to_u32().saturating_sub(offset)).unwrap_or_default()
1133            }
1134        }
1135
1136        impl From<NonZeroU32> for $number_name {
1137            #[inline]
1138            fn from(index: NonZeroU32) -> Self {
1139                Self(index)
1140            }
1141        }
1142
1143        impl From<$index_name> for $number_name {
1144            #[inline]
1145            fn from(index: $index_name) -> Self {
1146                Self(unsafe { NonZeroU32::new_unchecked(index.to_u32() + 1) })
1147            }
1148        }
1149
1150        impl core::ops::Add<u32> for $number_name {
1151            type Output = Self;
1152
1153            #[inline]
1154            fn add(self, rhs: u32) -> Self {
1155                Self(unsafe { NonZeroU32::new_unchecked(self.0.get() + rhs) })
1156            }
1157        }
1158
1159        impl core::ops::AddAssign<u32> for $number_name {
1160            fn add_assign(&mut self, rhs: u32) {
1161                let result = *self + rhs;
1162                *self = result;
1163            }
1164        }
1165
1166        impl core::ops::Add<i32> for $number_name {
1167            type Output = Self;
1168
1169            fn add(self, rhs: i32) -> Self {
1170                self.to_u32()
1171                    .checked_add_signed(rhs)
1172                    .and_then(Self::new)
1173                    .expect("invalid offset: overflow occurred")
1174            }
1175        }
1176
1177        impl core::ops::AddAssign<i32> for $number_name {
1178            fn add_assign(&mut self, rhs: i32) {
1179                let result = *self + rhs;
1180                *self = result;
1181            }
1182        }
1183
1184        impl core::ops::Sub<u32> for $number_name {
1185            type Output = Self;
1186
1187            #[inline]
1188            fn sub(self, rhs: u32) -> Self {
1189                self.to_u32()
1190                    .checked_sub(rhs)
1191                    .and_then(Self::new)
1192                    .expect("invalid offset: overflow occurred")
1193            }
1194        }
1195
1196        impl core::ops::SubAssign<u32> for $number_name {
1197            fn sub_assign(&mut self, rhs: u32) {
1198                let result = *self - rhs;
1199                *self = result;
1200            }
1201        }
1202
1203        impl fmt::Display for $number_name {
1204            fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1205                fmt::Display::fmt(&self.0, f)
1206            }
1207        }
1208    };
1209}
1210
1211declare_dual_number_and_index_type!(Line, "line");
1212declare_dual_number_and_index_type!(Column, "column");
1213
1214#[cfg(test)]
1215mod tests {
1216    use super::*;
1217
1218    #[test]
1219    fn source_content_line_starts() {
1220        const CONTENT: &str = "\
1221begin
1222  push.1
1223  push.2
1224  add
1225end
1226";
1227        let content = SourceContent::new("masm", "foo.masm", CONTENT);
1228
1229        assert_eq!(content.line_count(), 6);
1230        assert_eq!(
1231            content
1232                .byte_slice(content.line_range(LineIndex(0)).expect("invalid line"))
1233                .expect("invalid byte range"),
1234            "begin\n".as_bytes()
1235        );
1236        assert_eq!(
1237            content
1238                .byte_slice(content.line_range(LineIndex(1)).expect("invalid line"))
1239                .expect("invalid byte range"),
1240            "  push.1\n".as_bytes()
1241        );
1242        assert_eq!(
1243            content
1244                .byte_slice(content.line_range(content.last_line_index()).expect("invalid line"))
1245                .expect("invalid byte range"),
1246            "".as_bytes()
1247        );
1248    }
1249
1250    #[test]
1251    fn source_content_line_starts_after_update() {
1252        const CONTENT: &str = "\
1253begin
1254  push.1
1255  push.2
1256  add
1257end
1258";
1259        const FRAGMENT: &str = "  push.2
1260  mul
1261end
1262";
1263        let mut content = SourceContent::new("masm", "foo.masm", CONTENT);
1264        content
1265            .update(FRAGMENT.to_string(), Some(Selection::from(LineIndex(4)..LineIndex(5))), 1)
1266            .expect("update failed");
1267
1268        assert_eq!(
1269            content.as_str(),
1270            "\
1271begin
1272  push.1
1273  push.2
1274  add
1275  push.2
1276  mul
1277end
1278"
1279        );
1280        assert_eq!(content.line_count(), 8);
1281        assert_eq!(
1282            content
1283                .byte_slice(content.line_range(LineIndex(0)).expect("invalid line"))
1284                .expect("invalid byte range"),
1285            "begin\n".as_bytes()
1286        );
1287        assert_eq!(
1288            content
1289                .byte_slice(content.line_range(LineIndex(3)).expect("invalid line"))
1290                .expect("invalid byte range"),
1291            "  add\n".as_bytes()
1292        );
1293        assert_eq!(
1294            content
1295                .byte_slice(content.line_range(LineIndex(4)).expect("invalid line"))
1296                .expect("invalid byte range"),
1297            "  push.2\n".as_bytes()
1298        );
1299        assert_eq!(
1300            content
1301                .byte_slice(content.line_range(content.last_line_index()).expect("invalid line"))
1302                .expect("invalid byte range"),
1303            "".as_bytes()
1304        );
1305    }
1306}