miden_debug_types/
source_file.rs

1use alloc::{
2    boxed::Box,
3    string::{String, ToString},
4    sync::Arc,
5    vec::Vec,
6};
7use core::{fmt, num::NonZeroU32, ops::Range};
8
9#[cfg(feature = "serde")]
10use serde::{Deserialize, Serialize};
11
12use super::{FileLineCol, Position, Selection, SourceId, SourceSpan, Uri};
13
14// SOURCE LANGUAGE
15// ================================================================================================
16
17#[derive(Debug, Copy, Clone, PartialEq, Eq)]
18pub enum SourceLanguage {
19    Masm,
20    Rust,
21    Other(&'static str),
22}
23
24impl AsRef<str> for SourceLanguage {
25    fn as_ref(&self) -> &str {
26        match self {
27            Self::Masm => "masm",
28            Self::Rust => "rust",
29            Self::Other(other) => other,
30        }
31    }
32}
33
34// SOURCE FILE
35// ================================================================================================
36
37/// A [SourceFile] represents a single file stored in a [super::SourceManager]
38#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
39#[cfg_attr(feature = "serde", derive(Deserialize, Serialize))]
40pub struct SourceFile {
41    /// The unique identifier allocated for this [SourceFile] by its owning [super::SourceManager]
42    id: SourceId,
43    /// The file content
44    #[cfg_attr(
45        feature = "serde",
46        serde(deserialize_with = "SourceContent::deserialize_and_recompute_line_starts")
47    )]
48    content: SourceContent,
49}
50
51impl miette::SourceCode for SourceFile {
52    fn read_span<'a>(
53        &'a self,
54        span: &miette::SourceSpan,
55        context_lines_before: usize,
56        context_lines_after: usize,
57    ) -> Result<alloc::boxed::Box<dyn miette::SpanContents<'a> + 'a>, miette::MietteError> {
58        let mut start =
59            u32::try_from(span.offset()).map_err(|_| miette::MietteError::OutOfBounds)?;
60        let len = u32::try_from(span.len()).map_err(|_| miette::MietteError::OutOfBounds)?;
61        let mut end = start.checked_add(len).ok_or(miette::MietteError::OutOfBounds)?;
62        if context_lines_before > 0 {
63            let line_index = self.content.line_index(start.into());
64            let start_line_index = line_index.saturating_sub(context_lines_before as u32);
65            start = self.content.line_start(start_line_index).map(|idx| idx.to_u32()).unwrap_or(0);
66        }
67        if context_lines_after > 0 {
68            let line_index = self.content.line_index(end.into());
69            let end_line_index = line_index
70                .checked_add(context_lines_after as u32)
71                .ok_or(miette::MietteError::OutOfBounds)?;
72            end = self
73                .content
74                .line_range(end_line_index)
75                .map(|range| range.end.to_u32())
76                .unwrap_or_else(|| self.content.source_range().end.to_u32());
77        }
78        Ok(Box::new(ScopedSourceFileRef {
79            file: self,
80            span: miette::SourceSpan::new((start as usize).into(), end.abs_diff(start) as usize),
81        }))
82    }
83}
84
85impl SourceFile {
86    /// Create a new [SourceFile] from its raw components
87    pub fn new(id: SourceId, lang: SourceLanguage, uri: Uri, content: impl Into<Box<str>>) -> Self {
88        let content = SourceContent::new(lang, uri, content.into());
89        Self { id, content }
90    }
91
92    /// This function is intended for use by [super::SourceManager] implementations that need to
93    /// construct a [SourceFile] from its raw components (i.e. the identifier for the source file
94    /// and its content).
95    ///
96    /// Since the only entity that should be constructing a [SourceId] is a [super::SourceManager],
97    /// it is only valid to call this function in one of two scenarios:
98    ///
99    /// 1. You are a [super::SourceManager] constructing a [SourceFile] after allocating a
100    ///    [SourceId]
101    /// 2. You pass [`SourceId::default()`], i.e. [`SourceId::UNKNOWN`] for the source identifier.
102    ///    The resulting [SourceFile] will be valid and safe to use in a context where there isn't a
103    ///    [super::SourceManager] present. If there is a source manager in use, then constructing
104    ///    detached [SourceFile]s is _not_ recommended, because it will make it confusing to
105    ///    determine whether a given [SourceFile] reference is safe to use.
106    ///
107    /// You should rarely, if ever, fall in camp 2 - but it can be handy in some narrow cases
108    pub fn from_raw_parts(id: SourceId, content: SourceContent) -> Self {
109        Self { id, content }
110    }
111
112    /// Get the [SourceId] associated with this file
113    pub const fn id(&self) -> SourceId {
114        self.id
115    }
116
117    /// Get the name of this source file
118    pub fn uri(&self) -> &Uri {
119        self.content.uri()
120    }
121
122    /// Returns a reference to the underlying [SourceContent]
123    pub fn content(&self) -> &SourceContent {
124        &self.content
125    }
126
127    /// Returns a mutable reference to the underlying [SourceContent]
128    pub fn content_mut(&mut self) -> &mut SourceContent {
129        &mut self.content
130    }
131
132    /// Returns the number of lines in this file
133    pub fn line_count(&self) -> usize {
134        self.content.line_starts.len()
135    }
136
137    /// Returns the number of bytes in this file
138    pub fn len(&self) -> usize {
139        self.content.len()
140    }
141
142    /// Returns true if this file is empty
143    pub fn is_empty(&self) -> bool {
144        self.content.is_empty()
145    }
146
147    /// Get the underlying content of this file
148    #[inline(always)]
149    pub fn as_str(&self) -> &str {
150        self.content.as_str()
151    }
152
153    /// Get the underlying content of this file as a byte slice
154    #[inline(always)]
155    pub fn as_bytes(&self) -> &[u8] {
156        self.content.as_bytes()
157    }
158
159    /// Returns a [SourceSpan] covering the entirety of this file
160    #[inline]
161    pub fn source_span(&self) -> SourceSpan {
162        let range = self.content.source_range();
163        SourceSpan::new(self.id, range.start.0..range.end.0)
164    }
165
166    /// Returns a subset of the underlying content as a string slice.
167    ///
168    /// The bounds of the given span are byte indices, _not_ character indices.
169    ///
170    /// Returns `None` if the given span is out of bounds, or if the bounds do not
171    /// fall on valid UTF-8 character boundaries.
172    #[inline(always)]
173    pub fn source_slice(&self, span: impl Into<Range<usize>>) -> Option<&str> {
174        self.content.source_slice(span)
175    }
176
177    /// Returns a [SourceFileRef] corresponding to the bytes contained in the specified span.
178    pub fn slice(self: &Arc<Self>, span: impl Into<Range<u32>>) -> SourceFileRef {
179        SourceFileRef::new(Arc::clone(self), span)
180    }
181
182    /// Get a [SourceSpan] which points to the first byte of the character at `column` on `line`
183    ///
184    /// Returns `None` if the given line/column is out of bounds for this file.
185    pub fn line_column_to_span(
186        &self,
187        line: LineNumber,
188        column: ColumnNumber,
189    ) -> Option<SourceSpan> {
190        let offset = self.content.line_column_to_offset(line.into(), column.into())?;
191        Some(SourceSpan::at(self.id, offset.0))
192    }
193
194    /// Get a [FileLineCol] equivalent to the start of the given [SourceSpan]
195    pub fn location(&self, span: SourceSpan) -> FileLineCol {
196        assert_eq!(span.source_id(), self.id, "mismatched source ids");
197
198        self.content
199            .location(ByteIndex(span.into_range().start))
200            .expect("invalid source span: starting byte is out of bounds")
201    }
202}
203
204impl AsRef<str> for SourceFile {
205    #[inline(always)]
206    fn as_ref(&self) -> &str {
207        self.as_str()
208    }
209}
210
211impl AsRef<[u8]> for SourceFile {
212    #[inline(always)]
213    fn as_ref(&self) -> &[u8] {
214        self.as_bytes()
215    }
216}
217
218// SOURCE FILE REF
219// ================================================================================================
220
221/// A reference to a specific spanned region of a [SourceFile], that provides access to the actual
222/// [SourceFile], but scoped to the span it was created with.
223///
224/// This is useful in error types that implement [miette::Diagnostic], as it contains all of the
225/// data necessary to render the source code being referenced, without a [super::SourceManager] on
226/// hand.
227#[derive(Debug, Clone)]
228pub struct SourceFileRef {
229    file: Arc<SourceFile>,
230    span: SourceSpan,
231}
232
233impl SourceFileRef {
234    /// Create a [SourceFileRef] from a [SourceFile] and desired span (in bytes)
235    ///
236    /// The given span will be constrained to the bytes of `file`, so a span that reaches out of
237    /// bounds will have its end bound set to the last byte of the file.
238    pub fn new(file: Arc<SourceFile>, span: impl Into<Range<u32>>) -> Self {
239        let span = span.into();
240        let end = core::cmp::min(span.end, file.len() as u32);
241        let span = SourceSpan::new(file.id(), span.start..end);
242        Self { file, span }
243    }
244
245    /// Returns a ref-counted handle to the underlying [SourceFile]
246    pub fn source_file(&self) -> Arc<SourceFile> {
247        self.file.clone()
248    }
249
250    /// Returns the URI of the file this [SourceFileRef] is selecting
251    pub fn uri(&self) -> &Uri {
252        self.file.uri()
253    }
254
255    /// Returns the [SourceSpan] selected by this [SourceFileRef]
256    pub const fn span(&self) -> SourceSpan {
257        self.span
258    }
259
260    /// Returns the underlying `str` selected by this [SourceFileRef]
261    pub fn as_str(&self) -> &str {
262        self.file.source_slice(self.span).unwrap()
263    }
264
265    /// Returns the underlying bytes selected by this [SourceFileRef]
266    #[inline]
267    pub fn as_bytes(&self) -> &[u8] {
268        self.as_str().as_bytes()
269    }
270
271    /// Returns the number of bytes represented by the subset of the underlying file that is covered
272    /// by this [SourceFileRef]
273    pub fn len(&self) -> usize {
274        self.span.len()
275    }
276
277    /// Returns true if this selection is empty
278    pub fn is_empty(&self) -> bool {
279        self.len() == 0
280    }
281}
282
283impl Eq for SourceFileRef {}
284
285impl PartialEq for SourceFileRef {
286    fn eq(&self, other: &Self) -> bool {
287        self.as_str() == other.as_str()
288    }
289}
290
291impl Ord for SourceFileRef {
292    fn cmp(&self, other: &Self) -> core::cmp::Ordering {
293        self.as_str().cmp(other.as_str())
294    }
295}
296
297impl PartialOrd for SourceFileRef {
298    #[inline(always)]
299    fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> {
300        Some(self.cmp(other))
301    }
302}
303
304impl core::hash::Hash for SourceFileRef {
305    fn hash<H: core::hash::Hasher>(&self, state: &mut H) {
306        self.span.hash(state);
307        self.as_str().hash(state);
308    }
309}
310
311impl AsRef<str> for SourceFileRef {
312    #[inline(always)]
313    fn as_ref(&self) -> &str {
314        self.as_str()
315    }
316}
317
318impl AsRef<[u8]> for SourceFileRef {
319    #[inline(always)]
320    fn as_ref(&self) -> &[u8] {
321        self.as_bytes()
322    }
323}
324
325impl From<&SourceFileRef> for miette::SourceSpan {
326    fn from(source: &SourceFileRef) -> Self {
327        source.span.into()
328    }
329}
330
331/// Used to implement [miette::SpanContents] for [SourceFile] and [SourceFileRef]
332struct ScopedSourceFileRef<'a> {
333    file: &'a SourceFile,
334    span: miette::SourceSpan,
335}
336
337impl<'a> miette::SpanContents<'a> for ScopedSourceFileRef<'a> {
338    #[inline]
339    fn data(&self) -> &'a [u8] {
340        let start = self.span.offset();
341        let end = start + self.span.len();
342        &self.file.as_bytes()[start..end]
343    }
344
345    #[inline]
346    fn span(&self) -> &miette::SourceSpan {
347        &self.span
348    }
349
350    fn line(&self) -> usize {
351        let offset = self.span.offset() as u32;
352        self.file.content.line_index(offset.into()).to_usize()
353    }
354
355    fn column(&self) -> usize {
356        let start = self.span.offset() as u32;
357        let end = start + self.span.len() as u32;
358        let span = SourceSpan::new(self.file.id(), start..end);
359        let loc = self.file.location(span);
360        loc.column.to_index().to_usize()
361    }
362
363    #[inline]
364    fn line_count(&self) -> usize {
365        self.file.line_count()
366    }
367
368    #[inline]
369    fn name(&self) -> Option<&str> {
370        Some(self.file.uri().as_ref())
371    }
372
373    #[inline]
374    fn language(&self) -> Option<&str> {
375        None
376    }
377}
378
379impl miette::SourceCode for SourceFileRef {
380    #[inline]
381    fn read_span<'a>(
382        &'a self,
383        span: &miette::SourceSpan,
384        context_lines_before: usize,
385        context_lines_after: usize,
386    ) -> Result<alloc::boxed::Box<dyn miette::SpanContents<'a> + 'a>, miette::MietteError> {
387        self.file.read_span(span, context_lines_before, context_lines_after)
388    }
389}
390
391// SOURCE CONTENT
392// ================================================================================================
393
394/// Represents key information about a source file and its content:
395///
396/// * The path to the file (or its name, in the case of virtual files)
397/// * The content of the file
398/// * The byte offsets of every line in the file, for use in looking up line/column information
399#[derive(Clone)]
400#[cfg_attr(feature = "serde", derive(Deserialize, Serialize))]
401pub struct SourceContent {
402    /// The language identifier for this source file
403    language: Box<str>,
404    /// The path (or name) of this file
405    uri: Uri,
406    /// The underlying content of this file
407    content: String,
408    /// The byte offsets for each line in this file
409    #[cfg_attr(feature = "serde", serde(default, skip))]
410    line_starts: Vec<ByteIndex>,
411    /// The document version
412    #[cfg_attr(feature = "serde", serde(default))]
413    version: i32,
414}
415
416impl fmt::Debug for SourceContent {
417    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
418        let Self {
419            language,
420            uri,
421            content,
422            line_starts,
423            version,
424        } = self;
425        f.debug_struct("SourceContent")
426            .field("version", version)
427            .field("language", language)
428            .field("uri", uri)
429            .field("size_in_bytes", &content.len())
430            .field("line_count", &line_starts.len())
431            .field("content", content)
432            .finish()
433    }
434}
435
436impl Eq for SourceContent {}
437
438impl PartialEq for SourceContent {
439    #[inline]
440    fn eq(&self, other: &Self) -> bool {
441        self.language == other.language && self.uri == other.uri && self.content == other.content
442    }
443}
444
445impl Ord for SourceContent {
446    #[inline]
447    fn cmp(&self, other: &Self) -> core::cmp::Ordering {
448        self.uri.cmp(&other.uri).then_with(|| self.content.cmp(&other.content))
449    }
450}
451
452impl PartialOrd for SourceContent {
453    #[inline]
454    fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> {
455        Some(self.cmp(other))
456    }
457}
458
459impl core::hash::Hash for SourceContent {
460    fn hash<H: core::hash::Hasher>(&self, state: &mut H) {
461        self.language.hash(state);
462        self.uri.hash(state);
463        self.content.hash(state);
464    }
465}
466
467#[derive(Debug, thiserror::Error)]
468pub enum SourceContentUpdateError {
469    #[error("invalid content selection: start position of {}:{} is out of bounds", .0.line, .0.character)]
470    InvalidSelectionStart(Position),
471    #[error("invalid content selection: end position of {}:{} is out of bounds", .0.line, .0.character)]
472    InvalidSelectionEnd(Position),
473}
474
475impl SourceContent {
476    /// Create a new [SourceContent] from the (possibly virtual) file path, and its content as a
477    /// UTF-8 string.
478    ///
479    /// When created, the line starts for this file will be computed, which requires scanning the
480    /// file content once.
481    pub fn new(language: impl AsRef<str>, uri: impl Into<Uri>, content: impl Into<String>) -> Self {
482        let language = language.as_ref().to_string().into_boxed_str();
483        let content: String = content.into();
484        let bytes = content.as_bytes();
485
486        assert!(
487            bytes.len() < u32::MAX as usize,
488            "unsupported source file: current maximum supported length in bytes is 2^32"
489        );
490
491        let line_starts = compute_line_starts(&content, None);
492
493        Self {
494            language,
495            uri: uri.into(),
496            content,
497            line_starts,
498            version: 0,
499        }
500    }
501
502    /// Get the language identifier of this source file
503    pub fn language(&self) -> &str {
504        &self.language
505    }
506
507    /// Get the current version of this source file's content
508    pub fn version(&self) -> i32 {
509        self.version
510    }
511
512    /// Set the current version of this content
513    #[inline(always)]
514    pub fn set_version(&mut self, version: i32) {
515        self.version = version;
516    }
517
518    /// Get the URI of this source file
519    #[inline]
520    pub fn uri(&self) -> &Uri {
521        &self.uri
522    }
523
524    /// Returns the underlying content as a string slice
525    #[inline(always)]
526    pub fn as_str(&self) -> &str {
527        self.content.as_ref()
528    }
529
530    /// Returns the underlying content as a byte slice
531    #[inline(always)]
532    pub fn as_bytes(&self) -> &[u8] {
533        self.content.as_bytes()
534    }
535
536    /// Returns the size in bytes of the underlying content
537    #[inline(always)]
538    pub fn len(&self) -> usize {
539        self.content.len()
540    }
541
542    /// Returns true if the underlying content is empty
543    #[inline(always)]
544    pub fn is_empty(&self) -> bool {
545        self.content.is_empty()
546    }
547
548    /// Returns the range of valid byte indices for this file
549    #[inline]
550    pub fn source_range(&self) -> Range<ByteIndex> {
551        ByteIndex(0)..ByteIndex(self.content.len() as u32)
552    }
553
554    /// Returns a subset of the underlying content as a string slice.
555    ///
556    /// The bounds of the given span are byte indices, _not_ character indices.
557    ///
558    /// Returns `None` if the given span is out of bounds, or if the bounds do not
559    /// fall on valid UTF-8 character boundaries.
560    #[inline(always)]
561    pub fn source_slice(&self, span: impl Into<Range<usize>>) -> Option<&str> {
562        self.as_str().get(span.into())
563    }
564
565    /// Returns a subset of the underlying content as a byte slice.
566    ///
567    /// Returns `None` if the given span is out of bounds
568    #[inline(always)]
569    pub fn byte_slice(&self, span: impl Into<Range<ByteIndex>>) -> Option<&[u8]> {
570        let Range { start, end } = span.into();
571        self.as_bytes().get(start.to_usize()..end.to_usize())
572    }
573
574    /// Like [Self::source_slice], but the slice is computed like a selection in an editor, i.e.
575    /// based on line/column positions, rather than raw character indices.
576    ///
577    /// This is useful when mapping LSP operations to content in the source file.
578    pub fn select(&self, mut range: Selection) -> Option<&str> {
579        range.canonicalize();
580
581        let start = self.line_column_to_offset(range.start.line, range.start.character)?;
582        let end = self.line_column_to_offset(range.end.line, range.end.character)?;
583
584        Some(&self.as_str()[start.to_usize()..end.to_usize()])
585    }
586
587    /// Returns the number of lines in the source content
588    pub fn line_count(&self) -> usize {
589        self.line_starts.len()
590    }
591
592    /// Returns the byte index at which the line corresponding to `line_index` starts
593    ///
594    /// Returns `None` if the given index is out of bounds
595    pub fn line_start(&self, line_index: LineIndex) -> Option<ByteIndex> {
596        self.line_starts.get(line_index.to_usize()).copied()
597    }
598
599    /// Returns the index of the last line in this file
600    pub fn last_line_index(&self) -> LineIndex {
601        LineIndex(self.line_count().saturating_sub(1).try_into().expect("too many lines in file"))
602    }
603
604    /// Get the range of byte indices covered by the given line
605    pub fn line_range(&self, line_index: LineIndex) -> Option<Range<ByteIndex>> {
606        let line_start = self.line_start(line_index)?;
607        match self.line_start(line_index + 1) {
608            Some(line_end) => Some(line_start..line_end),
609            None => Some(line_start..ByteIndex(self.content.len() as u32)),
610        }
611    }
612
613    /// Get the index of the line to which `byte_index` belongs
614    pub fn line_index(&self, byte_index: ByteIndex) -> LineIndex {
615        match self.line_starts.binary_search(&byte_index) {
616            Ok(line) => LineIndex(line as u32),
617            Err(next_line) => LineIndex(next_line as u32 - 1),
618        }
619    }
620
621    /// Get the [ByteIndex] corresponding to the given line and column indices.
622    ///
623    /// Returns `None` if the line or column indices are out of bounds.
624    pub fn line_column_to_offset(
625        &self,
626        line_index: LineIndex,
627        column_index: ColumnIndex,
628    ) -> Option<ByteIndex> {
629        let column_index = column_index.to_usize();
630        let line_span = self.line_range(line_index)?;
631        let line_src = self
632            .content
633            .get(line_span.start.to_usize()..line_span.end.to_usize())
634            .expect("invalid line boundaries: invalid utf-8");
635        if line_src.len() < column_index {
636            return None;
637        }
638        let (pre, _) = line_src.split_at(column_index);
639        let start = line_span.start;
640        Some(start + ByteOffset::from_str_len(pre))
641    }
642
643    /// Get a [FileLineCol] corresponding to the line/column in this file at which `byte_index`
644    /// occurs
645    pub fn location(&self, byte_index: ByteIndex) -> Option<FileLineCol> {
646        let line_index = self.line_index(byte_index);
647        let line_start_index = self.line_start(line_index)?;
648        let line_src = self.content.get(line_start_index.to_usize()..byte_index.to_usize())?;
649        let column_index = ColumnIndex::from(line_src.chars().count() as u32);
650        Some(FileLineCol {
651            uri: self.uri.clone(),
652            line: line_index.number(),
653            column: column_index.number(),
654        })
655    }
656
657    /// Update the source document after being notified of a change event.
658    ///
659    /// The `version` indicates the new version of the document
660    ///
661    /// NOTE: This is intended to update a [super::SourceManager]'s view of the content of the
662    /// document, _not_ to perform an update against the actual file, wherever it may be.
663    pub fn update(
664        &mut self,
665        text: String,
666        range: Option<Selection>,
667        version: i32,
668    ) -> Result<(), SourceContentUpdateError> {
669        match range {
670            Some(range) => {
671                let start = self
672                    .line_column_to_offset(range.start.line, range.start.character)
673                    .ok_or(SourceContentUpdateError::InvalidSelectionStart(range.start))?
674                    .to_usize();
675                let end = self
676                    .line_column_to_offset(range.end.line, range.end.character)
677                    .ok_or(SourceContentUpdateError::InvalidSelectionEnd(range.end))?
678                    .to_usize();
679                assert!(start <= end, "start of range must be less than end, got {start}..{end}",);
680                self.content.replace_range(start..end, &text);
681
682                let added_line_starts = compute_line_starts(&text, Some(start as u32));
683                let num_added = added_line_starts.len();
684                let splice_start = range.start.line.to_usize() + 1;
685                // Determine deletion range in line_starts to respect Selection semantics.
686                // For multi-line edits, remove line starts from (start.line + 1) up to end.line
687                // inclusive, since all intervening newlines are removed by the
688                // replacement, regardless of end.character.
689                enum Deletion {
690                    Empty,
691                    Inclusive(usize), // inclusive end index
692                }
693                let deletion = if range.start.line == range.end.line {
694                    Deletion::Empty
695                } else {
696                    let mut end_line_for_splice = range.end.line.to_usize();
697                    if !self.line_starts.is_empty() {
698                        let max_idx = self.line_starts.len() - 1;
699                        if end_line_for_splice > max_idx {
700                            end_line_for_splice = max_idx;
701                        }
702                    }
703                    if end_line_for_splice >= splice_start {
704                        Deletion::Inclusive(end_line_for_splice)
705                    } else {
706                        Deletion::Empty
707                    }
708                };
709
710                match deletion {
711                    Deletion::Empty => {
712                        self.line_starts.splice(splice_start..splice_start, added_line_starts);
713                    },
714                    Deletion::Inclusive(end_idx) => {
715                        self.line_starts.splice(splice_start..=end_idx, added_line_starts);
716                    },
717                }
718
719                let diff =
720                    (text.len() as i32).saturating_sub_unsigned((end as u32) - (start as u32));
721                if diff != 0 {
722                    for i in (splice_start + num_added)..self.line_starts.len() {
723                        self.line_starts[i] =
724                            ByteIndex(self.line_starts[i].to_u32().saturating_add_signed(diff));
725                    }
726                }
727            },
728            None => {
729                self.line_starts = compute_line_starts(&text, None);
730                self.content = text;
731            },
732        }
733
734        self.version = version;
735
736        Ok(())
737    }
738}
739
740#[cfg(feature = "serde")]
741impl SourceContent {
742    fn deserialize_and_recompute_line_starts<'de, D>(deserializer: D) -> Result<Self, D::Error>
743    where
744        D: serde::Deserializer<'de>,
745    {
746        let mut content = SourceContent::deserialize(deserializer)?;
747        content.line_starts = compute_line_starts(&content.content, None);
748        Ok(content)
749    }
750}
751
752fn compute_line_starts(text: &str, text_offset: Option<u32>) -> Vec<ByteIndex> {
753    let bytes = text.as_bytes();
754    let initial_line_offset = match text_offset {
755        Some(_) => None,
756        None => Some(ByteIndex(0)),
757    };
758    let text_offset = text_offset.unwrap_or(0);
759    initial_line_offset
760        .into_iter()
761        .chain(memchr::memchr_iter(b'\n', bytes).filter_map(|mut offset| {
762            // Determine if the newline has any preceding escapes
763            let mut preceding_escapes = 0;
764            let line_start = offset + 1;
765            while let Some(prev_offset) = offset.checked_sub(1) {
766                if bytes[prev_offset] == b'\\' {
767                    offset = prev_offset;
768                    preceding_escapes += 1;
769                    continue;
770                }
771                break;
772            }
773
774            // If the newline is escaped, do not count it as a new line
775            let is_escaped = preceding_escapes > 0 && preceding_escapes % 2 != 0;
776            if is_escaped {
777                None
778            } else {
779                Some(ByteIndex(text_offset + line_start as u32))
780            }
781        }))
782        .collect()
783}
784
785// SOURCE CONTENT INDICES
786// ================================================================================================
787
788/// An index representing the offset in bytes from the start of a source file
789#[derive(Default, Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
790#[cfg_attr(feature = "serde", derive(Deserialize, Serialize))]
791#[cfg_attr(feature = "serde", serde(transparent))]
792pub struct ByteIndex(pub u32);
793
794impl ByteIndex {
795    /// Create a [ByteIndex] from a raw `u32` index
796    pub const fn new(index: u32) -> Self {
797        Self(index)
798    }
799
800    /// Get the raw index as a usize
801    #[inline(always)]
802    pub const fn to_usize(self) -> usize {
803        self.0 as usize
804    }
805
806    /// Get the raw index as a u32
807    #[inline(always)]
808    pub const fn to_u32(self) -> u32 {
809        self.0
810    }
811}
812
813impl core::ops::Add<ByteOffset> for ByteIndex {
814    type Output = ByteIndex;
815
816    fn add(self, rhs: ByteOffset) -> Self {
817        Self((self.0 as i64 + rhs.0) as u32)
818    }
819}
820
821impl core::ops::Add<u32> for ByteIndex {
822    type Output = ByteIndex;
823
824    fn add(self, rhs: u32) -> Self {
825        Self(self.0 + rhs)
826    }
827}
828
829impl core::ops::AddAssign<ByteOffset> for ByteIndex {
830    fn add_assign(&mut self, rhs: ByteOffset) {
831        *self = *self + rhs;
832    }
833}
834
835impl core::ops::AddAssign<u32> for ByteIndex {
836    fn add_assign(&mut self, rhs: u32) {
837        self.0 += rhs;
838    }
839}
840
841impl core::ops::Sub<ByteOffset> for ByteIndex {
842    type Output = ByteIndex;
843
844    fn sub(self, rhs: ByteOffset) -> Self {
845        Self((self.0 as i64 - rhs.0) as u32)
846    }
847}
848
849impl core::ops::Sub<u32> for ByteIndex {
850    type Output = ByteIndex;
851
852    fn sub(self, rhs: u32) -> Self {
853        Self(self.0 - rhs)
854    }
855}
856
857impl core::ops::SubAssign<ByteOffset> for ByteIndex {
858    fn sub_assign(&mut self, rhs: ByteOffset) {
859        *self = *self - rhs;
860    }
861}
862
863impl core::ops::SubAssign<u32> for ByteIndex {
864    fn sub_assign(&mut self, rhs: u32) {
865        self.0 -= rhs;
866    }
867}
868
869impl From<u32> for ByteIndex {
870    fn from(index: u32) -> Self {
871        Self(index)
872    }
873}
874
875impl From<ByteIndex> for u32 {
876    fn from(index: ByteIndex) -> Self {
877        index.0
878    }
879}
880
881impl fmt::Display for ByteIndex {
882    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
883        fmt::Display::fmt(&self.0, f)
884    }
885}
886
887/// An offset in bytes relative to some [ByteIndex]
888#[derive(Default, Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
889pub struct ByteOffset(i64);
890
891impl ByteOffset {
892    /// Compute the offset in bytes represented by the given `char`
893    pub fn from_char_len(c: char) -> ByteOffset {
894        Self(c.len_utf8() as i64)
895    }
896
897    /// Compute the offset in bytes represented by the given `str`
898    pub fn from_str_len(s: &str) -> ByteOffset {
899        Self(s.len() as i64)
900    }
901}
902
903impl core::ops::Add for ByteOffset {
904    type Output = ByteOffset;
905
906    fn add(self, rhs: Self) -> Self {
907        Self(self.0 + rhs.0)
908    }
909}
910
911impl core::ops::AddAssign for ByteOffset {
912    fn add_assign(&mut self, rhs: Self) {
913        self.0 += rhs.0;
914    }
915}
916
917impl core::ops::Sub for ByteOffset {
918    type Output = ByteOffset;
919
920    fn sub(self, rhs: Self) -> Self {
921        Self(self.0 - rhs.0)
922    }
923}
924
925impl core::ops::SubAssign for ByteOffset {
926    fn sub_assign(&mut self, rhs: Self) {
927        self.0 -= rhs.0;
928    }
929}
930
931impl fmt::Display for ByteOffset {
932    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
933        fmt::Display::fmt(&self.0, f)
934    }
935}
936
937macro_rules! declare_dual_number_and_index_type {
938    ($name:ident, $description:literal) => {
939        paste::paste! {
940            declare_dual_number_and_index_type!([<$name Index>], [<$name Number>], $description);
941        }
942    };
943
944    ($index_name:ident, $number_name:ident, $description:literal) => {
945        #[doc = concat!("A zero-indexed ", $description, " number")]
946        #[derive(Default, Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
947        #[cfg_attr(feature = "serde", derive(Deserialize, Serialize))]
948        #[cfg_attr(feature = "serde", serde(transparent))]
949        pub struct $index_name(pub u32);
950
951        impl $index_name {
952            #[doc = concat!("Convert to a [", stringify!($number_name), "]")]
953            pub const fn number(self) -> $number_name {
954                $number_name(unsafe { NonZeroU32::new_unchecked(self.0 + 1) })
955            }
956
957            /// Get the raw index value as a usize
958            #[inline(always)]
959            pub const fn to_usize(self) -> usize {
960                self.0 as usize
961            }
962
963            /// Get the raw index value as a u32
964            #[inline(always)]
965            pub const fn to_u32(self) -> u32 {
966                self.0
967            }
968
969            /// Add `offset` to this index, returning `None` on overflow
970            pub fn checked_add(self, offset: u32) -> Option<Self> {
971                self.0.checked_add(offset).map(Self)
972            }
973
974            /// Add a signed `offset` to this index, returning `None` on overflow
975            pub fn checked_add_signed(self, offset: i32) -> Option<Self> {
976                self.0.checked_add_signed(offset).map(Self)
977            }
978
979            /// Subtract `offset` from this index, returning `None` on underflow
980            pub fn checked_sub(self, offset: u32) -> Option<Self> {
981                self.0.checked_sub(offset).map(Self)
982            }
983
984            /// Add `offset` to this index, saturating to `u32::MAX` on overflow
985            pub const fn saturating_add(self, offset: u32) -> Self {
986                Self(self.0.saturating_add(offset))
987            }
988
989            /// Add a signed `offset` to this index, saturating to `0` on underflow, and `u32::MAX`
990            /// on overflow.
991            pub const fn saturating_add_signed(self, offset: i32) -> Self {
992                Self(self.0.saturating_add_signed(offset))
993            }
994
995            /// Subtract `offset` from this index, saturating to `0` on overflow
996            pub const fn saturating_sub(self, offset: u32) -> Self {
997                Self(self.0.saturating_sub(offset))
998            }
999        }
1000
1001        impl From<u32> for $index_name {
1002            #[inline]
1003            fn from(index: u32) -> Self {
1004                Self(index)
1005            }
1006        }
1007
1008        impl From<$number_name> for $index_name {
1009            #[inline]
1010            fn from(index: $number_name) -> Self {
1011                Self(index.to_u32() - 1)
1012            }
1013        }
1014
1015        impl core::ops::Add<u32> for $index_name {
1016            type Output = Self;
1017
1018            #[inline]
1019            fn add(self, rhs: u32) -> Self {
1020                Self(self.0 + rhs)
1021            }
1022        }
1023
1024        impl core::ops::AddAssign<u32> for $index_name {
1025            fn add_assign(&mut self, rhs: u32) {
1026                let result = *self + rhs;
1027                *self = result;
1028            }
1029        }
1030
1031        impl core::ops::Add<i32> for $index_name {
1032            type Output = Self;
1033
1034            fn add(self, rhs: i32) -> Self {
1035                self.checked_add_signed(rhs).expect("invalid offset: overflow occurred")
1036            }
1037        }
1038
1039        impl core::ops::AddAssign<i32> for $index_name {
1040            fn add_assign(&mut self, rhs: i32) {
1041                let result = *self + rhs;
1042                *self = result;
1043            }
1044        }
1045
1046        impl core::ops::Sub<u32> for $index_name {
1047            type Output = Self;
1048
1049            #[inline]
1050            fn sub(self, rhs: u32) -> Self {
1051                Self(self.0 - rhs)
1052            }
1053        }
1054
1055        impl core::ops::SubAssign<u32> for $index_name {
1056            fn sub_assign(&mut self, rhs: u32) {
1057                let result = *self - rhs;
1058                *self = result;
1059            }
1060        }
1061
1062        impl fmt::Display for $index_name {
1063            fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1064                fmt::Display::fmt(&self.0, f)
1065            }
1066        }
1067
1068        #[doc = concat!("A one-indexed ", $description, " number")]
1069        #[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
1070        #[cfg_attr(feature = "serde", derive(Deserialize, Serialize))]
1071        #[cfg_attr(feature = "serde", serde(transparent))]
1072        pub struct $number_name(NonZeroU32);
1073
1074        impl Default for $number_name {
1075            fn default() -> Self {
1076                Self(unsafe { NonZeroU32::new_unchecked(1) })
1077            }
1078        }
1079
1080        impl $number_name {
1081            pub const fn new(number: u32) -> Option<Self> {
1082                match NonZeroU32::new(number) {
1083                    Some(num) => Some(Self(num)),
1084                    None => None,
1085                }
1086            }
1087
1088            #[doc = concat!("Convert to a [", stringify!($index_name), "]")]
1089            pub const fn to_index(self) -> $index_name {
1090                $index_name(self.to_u32().saturating_sub(1))
1091            }
1092
1093            /// Get the raw value as a usize
1094            #[inline(always)]
1095            pub const fn to_usize(self) -> usize {
1096                self.0.get() as usize
1097            }
1098
1099            /// Get the raw value as a u32
1100            #[inline(always)]
1101            pub const fn to_u32(self) -> u32 {
1102                self.0.get()
1103            }
1104
1105            /// Add `offset` to this index, returning `None` on overflow
1106            pub fn checked_add(self, offset: u32) -> Option<Self> {
1107                self.0.checked_add(offset).map(Self)
1108            }
1109
1110            /// Add a signed `offset` to this index, returning `None` on overflow
1111            pub fn checked_add_signed(self, offset: i32) -> Option<Self> {
1112                self.0.get().checked_add_signed(offset).and_then(Self::new)
1113            }
1114
1115            /// Subtract `offset` from this index, returning `None` on underflow
1116            pub fn checked_sub(self, offset: u32) -> Option<Self> {
1117                self.0.get().checked_sub(offset).and_then(Self::new)
1118            }
1119
1120            /// Add `offset` to this index, saturating to `u32::MAX` on overflow
1121            pub const fn saturating_add(self, offset: u32) -> Self {
1122                Self(unsafe { NonZeroU32::new_unchecked(self.0.get().saturating_add(offset)) })
1123            }
1124
1125            /// Add a signed `offset` to this index, saturating to `0` on underflow, and `u32::MAX`
1126            /// on overflow.
1127            pub fn saturating_add_signed(self, offset: i32) -> Self {
1128                Self::new(self.to_u32().saturating_add_signed(offset)).unwrap_or_default()
1129            }
1130
1131            /// Subtract `offset` from this index, saturating to `0` on overflow
1132            pub fn saturating_sub(self, offset: u32) -> Self {
1133                Self::new(self.to_u32().saturating_sub(offset)).unwrap_or_default()
1134            }
1135        }
1136
1137        impl From<NonZeroU32> for $number_name {
1138            #[inline]
1139            fn from(index: NonZeroU32) -> Self {
1140                Self(index)
1141            }
1142        }
1143
1144        impl From<$index_name> for $number_name {
1145            #[inline]
1146            fn from(index: $index_name) -> Self {
1147                Self(unsafe { NonZeroU32::new_unchecked(index.to_u32() + 1) })
1148            }
1149        }
1150
1151        impl core::ops::Add<u32> for $number_name {
1152            type Output = Self;
1153
1154            #[inline]
1155            fn add(self, rhs: u32) -> Self {
1156                Self(unsafe { NonZeroU32::new_unchecked(self.0.get() + rhs) })
1157            }
1158        }
1159
1160        impl core::ops::AddAssign<u32> for $number_name {
1161            fn add_assign(&mut self, rhs: u32) {
1162                let result = *self + rhs;
1163                *self = result;
1164            }
1165        }
1166
1167        impl core::ops::Add<i32> for $number_name {
1168            type Output = Self;
1169
1170            fn add(self, rhs: i32) -> Self {
1171                self.to_u32()
1172                    .checked_add_signed(rhs)
1173                    .and_then(Self::new)
1174                    .expect("invalid offset: overflow occurred")
1175            }
1176        }
1177
1178        impl core::ops::AddAssign<i32> for $number_name {
1179            fn add_assign(&mut self, rhs: i32) {
1180                let result = *self + rhs;
1181                *self = result;
1182            }
1183        }
1184
1185        impl core::ops::Sub<u32> for $number_name {
1186            type Output = Self;
1187
1188            #[inline]
1189            fn sub(self, rhs: u32) -> Self {
1190                self.to_u32()
1191                    .checked_sub(rhs)
1192                    .and_then(Self::new)
1193                    .expect("invalid offset: overflow occurred")
1194            }
1195        }
1196
1197        impl core::ops::SubAssign<u32> for $number_name {
1198            fn sub_assign(&mut self, rhs: u32) {
1199                let result = *self - rhs;
1200                *self = result;
1201            }
1202        }
1203
1204        impl fmt::Display for $number_name {
1205            fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1206                fmt::Display::fmt(&self.0, f)
1207            }
1208        }
1209    };
1210}
1211
1212declare_dual_number_and_index_type!(Line, "line");
1213declare_dual_number_and_index_type!(Column, "column");
1214
1215#[cfg(test)]
1216mod tests {
1217    use super::*;
1218
1219    #[test]
1220    fn source_content_line_starts() {
1221        const CONTENT: &str = "\
1222begin
1223  push.1
1224  push.2
1225  add
1226end
1227";
1228        let content = SourceContent::new("masm", "foo.masm", CONTENT);
1229
1230        assert_eq!(content.line_count(), 6);
1231        assert_eq!(
1232            content
1233                .byte_slice(content.line_range(LineIndex(0)).expect("invalid line"))
1234                .expect("invalid byte range"),
1235            "begin\n".as_bytes()
1236        );
1237        assert_eq!(
1238            content
1239                .byte_slice(content.line_range(LineIndex(1)).expect("invalid line"))
1240                .expect("invalid byte range"),
1241            "  push.1\n".as_bytes()
1242        );
1243        assert_eq!(
1244            content
1245                .byte_slice(content.line_range(content.last_line_index()).expect("invalid line"))
1246                .expect("invalid byte range"),
1247            "".as_bytes()
1248        );
1249    }
1250
1251    #[test]
1252    fn source_content_line_starts_after_update() {
1253        const CONTENT: &str = "\
1254begin
1255  push.1
1256  push.2
1257  add
1258end
1259";
1260        const FRAGMENT: &str = "  push.2
1261  mul
1262end
1263";
1264        let mut content = SourceContent::new("masm", "foo.masm", CONTENT);
1265        content
1266            .update(FRAGMENT.to_string(), Some(Selection::from(LineIndex(4)..LineIndex(5))), 1)
1267            .expect("update failed");
1268
1269        assert_eq!(
1270            content.as_str(),
1271            "\
1272begin
1273  push.1
1274  push.2
1275  add
1276  push.2
1277  mul
1278end
1279"
1280        );
1281        assert_eq!(content.line_count(), 8);
1282        assert_eq!(
1283            content
1284                .byte_slice(content.line_range(LineIndex(0)).expect("invalid line"))
1285                .expect("invalid byte range"),
1286            "begin\n".as_bytes()
1287        );
1288        assert_eq!(
1289            content
1290                .byte_slice(content.line_range(LineIndex(3)).expect("invalid line"))
1291                .expect("invalid byte range"),
1292            "  add\n".as_bytes()
1293        );
1294        assert_eq!(
1295            content
1296                .byte_slice(content.line_range(LineIndex(4)).expect("invalid line"))
1297                .expect("invalid byte range"),
1298            "  push.2\n".as_bytes()
1299        );
1300        assert_eq!(
1301            content
1302                .byte_slice(content.line_range(content.last_line_index()).expect("invalid line"))
1303                .expect("invalid byte range"),
1304            "".as_bytes()
1305        );
1306    }
1307}