Skip to main content

miden_debug_types/
source_file.rs

1use alloc::{
2    boxed::Box,
3    string::{String, ToString},
4    sync::Arc,
5    vec::Vec,
6};
7use core::{fmt, num::NonZeroU32, ops::Range};
8
9#[cfg(feature = "serde")]
10use serde::{Deserialize, Serialize};
11
12use super::{FileLineCol, Position, Selection, SourceId, SourceSpan, Uri};
13
14// SOURCE LANGUAGE
15// ================================================================================================
16
17#[derive(Debug, Copy, Clone, PartialEq, Eq)]
18pub enum SourceLanguage {
19    Masm,
20    Rust,
21    Other(&'static str),
22}
23
24impl AsRef<str> for SourceLanguage {
25    fn as_ref(&self) -> &str {
26        match self {
27            Self::Masm => "masm",
28            Self::Rust => "rust",
29            Self::Other(other) => other,
30        }
31    }
32}
33
34// SOURCE FILE
35// ================================================================================================
36
37/// A [SourceFile] represents a single file stored in a [super::SourceManager]
38#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
39#[cfg_attr(feature = "serde", derive(Deserialize, Serialize))]
40pub struct SourceFile {
41    /// The unique identifier allocated for this [SourceFile] by its owning [super::SourceManager]
42    id: SourceId,
43    /// The file content
44    #[cfg_attr(
45        feature = "serde",
46        serde(deserialize_with = "SourceContent::deserialize_and_recompute_line_starts")
47    )]
48    content: SourceContent,
49}
50
51impl miette::SourceCode for SourceFile {
52    fn read_span<'a>(
53        &'a self,
54        span: &miette::SourceSpan,
55        context_lines_before: usize,
56        context_lines_after: usize,
57    ) -> Result<alloc::boxed::Box<dyn miette::SpanContents<'a> + 'a>, miette::MietteError> {
58        let mut start =
59            u32::try_from(span.offset()).map_err(|_| miette::MietteError::OutOfBounds)?;
60        let len = u32::try_from(span.len()).map_err(|_| miette::MietteError::OutOfBounds)?;
61        let mut end = start.checked_add(len).ok_or(miette::MietteError::OutOfBounds)?;
62        if context_lines_before > 0 {
63            let line_index = self.content.line_index(start.into());
64            let start_line_index = line_index.saturating_sub(context_lines_before as u32);
65            start = self.content.line_start(start_line_index).map(|idx| idx.to_u32()).unwrap_or(0);
66        }
67        if context_lines_after > 0 {
68            let line_index = self.content.line_index(end.into());
69            let end_line_index = line_index
70                .checked_add(context_lines_after as u32)
71                .ok_or(miette::MietteError::OutOfBounds)?;
72            end = self
73                .content
74                .line_range(end_line_index)
75                .map(|range| range.end.to_u32())
76                .unwrap_or_else(|| self.content.source_range().end.to_u32());
77        }
78        Ok(Box::new(ScopedSourceFileRef {
79            file: self,
80            span: miette::SourceSpan::new((start as usize).into(), end.abs_diff(start) as usize),
81        }))
82    }
83}
84
85impl SourceFile {
86    /// Create a new [SourceFile] from its raw components
87    pub fn new(id: SourceId, lang: SourceLanguage, uri: Uri, content: impl Into<Box<str>>) -> Self {
88        let content = SourceContent::new(lang, uri, content.into());
89        Self { id, content }
90    }
91
92    /// This function is intended for use by [super::SourceManager] implementations that need to
93    /// construct a [SourceFile] from its raw components (i.e. the identifier for the source file
94    /// and its content).
95    ///
96    /// Since the only entity that should be constructing a [SourceId] is a [super::SourceManager],
97    /// it is only valid to call this function in one of two scenarios:
98    ///
99    /// 1. You are a [super::SourceManager] constructing a [SourceFile] after allocating a
100    ///    [SourceId]
101    /// 2. You pass [`SourceId::default()`], i.e. [`SourceId::UNKNOWN`] for the source identifier.
102    ///    The resulting [SourceFile] will be valid and safe to use in a context where there isn't a
103    ///    [super::SourceManager] present. If there is a source manager in use, then constructing
104    ///    detached [SourceFile]s is _not_ recommended, because it will make it confusing to
105    ///    determine whether a given [SourceFile] reference is safe to use.
106    ///
107    /// You should rarely, if ever, fall in camp 2 - but it can be handy in some narrow cases
108    pub fn from_raw_parts(id: SourceId, content: SourceContent) -> Self {
109        Self { id, content }
110    }
111
112    /// Get the [SourceId] associated with this file
113    pub const fn id(&self) -> SourceId {
114        self.id
115    }
116
117    /// Get the name of this source file
118    pub fn uri(&self) -> &Uri {
119        self.content.uri()
120    }
121
122    /// Returns a reference to the underlying [SourceContent]
123    pub fn content(&self) -> &SourceContent {
124        &self.content
125    }
126
127    /// Returns a mutable reference to the underlying [SourceContent]
128    pub fn content_mut(&mut self) -> &mut SourceContent {
129        &mut self.content
130    }
131
132    /// Returns the number of lines in this file
133    pub fn line_count(&self) -> usize {
134        self.content.line_starts.len()
135    }
136
137    /// Returns the number of bytes in this file
138    pub fn len(&self) -> usize {
139        self.content.len()
140    }
141
142    /// Returns true if this file is empty
143    pub fn is_empty(&self) -> bool {
144        self.content.is_empty()
145    }
146
147    /// Get the underlying content of this file
148    #[inline(always)]
149    pub fn as_str(&self) -> &str {
150        self.content.as_str()
151    }
152
153    /// Get the underlying content of this file as a byte slice
154    #[inline(always)]
155    pub fn as_bytes(&self) -> &[u8] {
156        self.content.as_bytes()
157    }
158
159    /// Returns a [SourceSpan] covering the entirety of this file
160    #[inline]
161    pub fn source_span(&self) -> SourceSpan {
162        let range = self.content.source_range();
163        SourceSpan::new(self.id, range.start.0..range.end.0)
164    }
165
166    /// Returns a subset of the underlying content as a string slice.
167    ///
168    /// The bounds of the given span are byte indices, _not_ character indices.
169    ///
170    /// Returns `None` if the given span is out of bounds, or if the bounds do not
171    /// fall on valid UTF-8 character boundaries.
172    #[inline(always)]
173    pub fn source_slice(&self, span: impl Into<Range<usize>>) -> Option<&str> {
174        self.content.source_slice(span)
175    }
176
177    /// Returns a [SourceFileRef] corresponding to the bytes contained in the specified span.
178    pub fn slice(self: &Arc<Self>, span: impl Into<Range<u32>>) -> SourceFileRef {
179        SourceFileRef::new(Arc::clone(self), span)
180    }
181
182    /// Get a [SourceSpan] which points to the first byte of the character at `column` on `line`
183    ///
184    /// Returns `None` if the given line/column is out of bounds for this file.
185    pub fn line_column_to_span(
186        &self,
187        line: LineNumber,
188        column: ColumnNumber,
189    ) -> Option<SourceSpan> {
190        let offset = self.content.line_column_to_offset(line.into(), column.into())?;
191        Some(SourceSpan::at(self.id, offset.0))
192    }
193
194    /// Get a [FileLineCol] equivalent to the start of the given [SourceSpan]
195    pub fn location(&self, span: SourceSpan) -> FileLineCol {
196        assert_eq!(span.source_id(), self.id, "mismatched source ids");
197
198        self.content
199            .location(ByteIndex(span.into_range().start))
200            .expect("invalid source span: starting byte is out of bounds")
201    }
202}
203
204impl AsRef<str> for SourceFile {
205    #[inline(always)]
206    fn as_ref(&self) -> &str {
207        self.as_str()
208    }
209}
210
211impl AsRef<[u8]> for SourceFile {
212    #[inline(always)]
213    fn as_ref(&self) -> &[u8] {
214        self.as_bytes()
215    }
216}
217
218// SOURCE FILE REF
219// ================================================================================================
220
221/// A reference to a specific spanned region of a [SourceFile], that provides access to the actual
222/// [SourceFile], but scoped to the span it was created with.
223///
224/// This is useful in error types that implement [miette::Diagnostic], as it contains all of the
225/// data necessary to render the source code being referenced, without a [super::SourceManager] on
226/// hand.
227#[derive(Debug, Clone)]
228pub struct SourceFileRef {
229    file: Arc<SourceFile>,
230    span: SourceSpan,
231}
232
233impl SourceFileRef {
234    /// Create a [SourceFileRef] from a [SourceFile] and desired span (in bytes)
235    ///
236    /// The given span will be constrained to the bytes of `file`, so a span that reaches out of
237    /// bounds will have its end bound set to the last byte of the file.
238    pub fn new(file: Arc<SourceFile>, span: impl Into<Range<u32>>) -> Self {
239        let span = span.into();
240        let end = core::cmp::min(span.end, file.len() as u32);
241        let span = SourceSpan::new(file.id(), span.start..end);
242        Self { file, span }
243    }
244
245    /// Returns a ref-counted handle to the underlying [SourceFile]
246    pub fn source_file(&self) -> Arc<SourceFile> {
247        self.file.clone()
248    }
249
250    /// Returns the URI of the file this [SourceFileRef] is selecting
251    pub fn uri(&self) -> &Uri {
252        self.file.uri()
253    }
254
255    /// Returns the [SourceSpan] selected by this [SourceFileRef]
256    pub const fn span(&self) -> SourceSpan {
257        self.span
258    }
259
260    /// Returns the underlying `str` selected by this [SourceFileRef]
261    pub fn as_str(&self) -> &str {
262        self.file.source_slice(self.span).unwrap()
263    }
264
265    /// Returns the underlying bytes selected by this [SourceFileRef]
266    #[inline]
267    pub fn as_bytes(&self) -> &[u8] {
268        self.as_str().as_bytes()
269    }
270
271    /// Returns the number of bytes represented by the subset of the underlying file that is covered
272    /// by this [SourceFileRef]
273    pub fn len(&self) -> usize {
274        self.span.len()
275    }
276
277    /// Returns true if this selection is empty
278    pub fn is_empty(&self) -> bool {
279        self.len() == 0
280    }
281}
282
283impl Eq for SourceFileRef {}
284
285impl PartialEq for SourceFileRef {
286    fn eq(&self, other: &Self) -> bool {
287        self.as_str() == other.as_str()
288    }
289}
290
291impl Ord for SourceFileRef {
292    fn cmp(&self, other: &Self) -> core::cmp::Ordering {
293        self.as_str().cmp(other.as_str())
294    }
295}
296
297impl PartialOrd for SourceFileRef {
298    #[inline(always)]
299    fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> {
300        Some(self.cmp(other))
301    }
302}
303
304impl core::hash::Hash for SourceFileRef {
305    fn hash<H: core::hash::Hasher>(&self, state: &mut H) {
306        self.as_str().hash(state);
307    }
308}
309
310impl AsRef<str> for SourceFileRef {
311    #[inline(always)]
312    fn as_ref(&self) -> &str {
313        self.as_str()
314    }
315}
316
317impl AsRef<[u8]> for SourceFileRef {
318    #[inline(always)]
319    fn as_ref(&self) -> &[u8] {
320        self.as_bytes()
321    }
322}
323
324impl From<&SourceFileRef> for miette::SourceSpan {
325    fn from(source: &SourceFileRef) -> Self {
326        source.span.into()
327    }
328}
329
330/// Used to implement [miette::SpanContents] for [SourceFile] and [SourceFileRef]
331struct ScopedSourceFileRef<'a> {
332    file: &'a SourceFile,
333    span: miette::SourceSpan,
334}
335
336impl<'a> miette::SpanContents<'a> for ScopedSourceFileRef<'a> {
337    #[inline]
338    fn data(&self) -> &'a [u8] {
339        let start = self.span.offset();
340        let end = start + self.span.len();
341        &self.file.as_bytes()[start..end]
342    }
343
344    #[inline]
345    fn span(&self) -> &miette::SourceSpan {
346        &self.span
347    }
348
349    fn line(&self) -> usize {
350        let offset = self.span.offset() as u32;
351        self.file.content.line_index(offset.into()).to_usize()
352    }
353
354    fn column(&self) -> usize {
355        let start = self.span.offset() as u32;
356        let end = start + self.span.len() as u32;
357        let span = SourceSpan::new(self.file.id(), start..end);
358        let loc = self.file.location(span);
359        loc.column.to_index().to_usize()
360    }
361
362    #[inline]
363    fn line_count(&self) -> usize {
364        self.file.line_count()
365    }
366
367    #[inline]
368    fn name(&self) -> Option<&str> {
369        Some(self.file.uri().as_ref())
370    }
371
372    #[inline]
373    fn language(&self) -> Option<&str> {
374        None
375    }
376}
377
378impl miette::SourceCode for SourceFileRef {
379    #[inline]
380    fn read_span<'a>(
381        &'a self,
382        span: &miette::SourceSpan,
383        context_lines_before: usize,
384        context_lines_after: usize,
385    ) -> Result<alloc::boxed::Box<dyn miette::SpanContents<'a> + 'a>, miette::MietteError> {
386        self.file.read_span(span, context_lines_before, context_lines_after)
387    }
388}
389
390// SOURCE CONTENT
391// ================================================================================================
392
393/// Represents key information about a source file and its content:
394///
395/// * The path to the file (or its name, in the case of virtual files)
396/// * The content of the file
397/// * The byte offsets of every line in the file, for use in looking up line/column information
398#[derive(Clone)]
399#[cfg_attr(feature = "serde", derive(Deserialize, Serialize))]
400pub struct SourceContent {
401    /// The language identifier for this source file
402    language: Box<str>,
403    /// The path (or name) of this file
404    uri: Uri,
405    /// The underlying content of this file
406    content: String,
407    /// The byte offsets for each line in this file
408    #[cfg_attr(feature = "serde", serde(default, skip))]
409    line_starts: Vec<ByteIndex>,
410    /// The document version
411    #[cfg_attr(feature = "serde", serde(default))]
412    version: i32,
413}
414
415impl fmt::Debug for SourceContent {
416    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
417        let Self {
418            language,
419            uri,
420            content,
421            line_starts,
422            version,
423        } = self;
424        f.debug_struct("SourceContent")
425            .field("version", version)
426            .field("language", language)
427            .field("uri", uri)
428            .field("size_in_bytes", &content.len())
429            .field("line_count", &line_starts.len())
430            .field("content", content)
431            .finish()
432    }
433}
434
435impl Eq for SourceContent {}
436
437impl PartialEq for SourceContent {
438    #[inline]
439    fn eq(&self, other: &Self) -> bool {
440        self.language == other.language && self.uri == other.uri && self.content == other.content
441    }
442}
443
444impl Ord for SourceContent {
445    #[inline]
446    fn cmp(&self, other: &Self) -> core::cmp::Ordering {
447        self.uri.cmp(&other.uri).then_with(|| self.content.cmp(&other.content))
448    }
449}
450
451impl PartialOrd for SourceContent {
452    #[inline]
453    fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> {
454        Some(self.cmp(other))
455    }
456}
457
458impl core::hash::Hash for SourceContent {
459    fn hash<H: core::hash::Hasher>(&self, state: &mut H) {
460        self.language.hash(state);
461        self.uri.hash(state);
462        self.content.hash(state);
463    }
464}
465
466#[derive(Debug, thiserror::Error)]
467pub enum SourceContentUpdateError {
468    #[error("invalid content selection: start position of {}:{} is out of bounds", .0.line, .0.character)]
469    InvalidSelectionStart(Position),
470    #[error("invalid content selection: end position of {}:{} is out of bounds", .0.line, .0.character)]
471    InvalidSelectionEnd(Position),
472}
473
474impl SourceContent {
475    /// Create a new [SourceContent] from the (possibly virtual) file path, and its content as a
476    /// UTF-8 string.
477    ///
478    /// When created, the line starts for this file will be computed, which requires scanning the
479    /// file content once.
480    pub fn new(language: impl AsRef<str>, uri: impl Into<Uri>, content: impl Into<String>) -> Self {
481        let language = language.as_ref().to_string().into_boxed_str();
482        let content: String = content.into();
483        let bytes = content.as_bytes();
484
485        assert!(
486            bytes.len() < u32::MAX as usize,
487            "unsupported source file: current maximum supported length in bytes is 2^32"
488        );
489
490        let line_starts = compute_line_starts(&content, None);
491
492        Self {
493            language,
494            uri: uri.into(),
495            content,
496            line_starts,
497            version: 0,
498        }
499    }
500
501    /// Get the language identifier of this source file
502    pub fn language(&self) -> &str {
503        &self.language
504    }
505
506    /// Get the current version of this source file's content
507    pub fn version(&self) -> i32 {
508        self.version
509    }
510
511    /// Set the current version of this content
512    #[inline(always)]
513    pub fn set_version(&mut self, version: i32) {
514        self.version = version;
515    }
516
517    /// Get the URI of this source file
518    #[inline]
519    pub fn uri(&self) -> &Uri {
520        &self.uri
521    }
522
523    /// Returns the underlying content as a string slice
524    #[inline(always)]
525    pub fn as_str(&self) -> &str {
526        self.content.as_ref()
527    }
528
529    /// Returns the underlying content as a byte slice
530    #[inline(always)]
531    pub fn as_bytes(&self) -> &[u8] {
532        self.content.as_bytes()
533    }
534
535    /// Returns the size in bytes of the underlying content
536    #[inline(always)]
537    pub fn len(&self) -> usize {
538        self.content.len()
539    }
540
541    /// Returns true if the underlying content is empty
542    #[inline(always)]
543    pub fn is_empty(&self) -> bool {
544        self.content.is_empty()
545    }
546
547    /// Returns the range of valid byte indices for this file
548    #[inline]
549    pub fn source_range(&self) -> Range<ByteIndex> {
550        ByteIndex(0)..ByteIndex(self.content.len() as u32)
551    }
552
553    /// Returns a subset of the underlying content as a string slice.
554    ///
555    /// The bounds of the given span are byte indices, _not_ character indices.
556    ///
557    /// Returns `None` if the given span is out of bounds, or if the bounds do not
558    /// fall on valid UTF-8 character boundaries.
559    #[inline(always)]
560    pub fn source_slice(&self, span: impl Into<Range<usize>>) -> Option<&str> {
561        self.as_str().get(span.into())
562    }
563
564    /// Returns a subset of the underlying content as a byte slice.
565    ///
566    /// Returns `None` if the given span is out of bounds
567    #[inline(always)]
568    pub fn byte_slice(&self, span: impl Into<Range<ByteIndex>>) -> Option<&[u8]> {
569        let Range { start, end } = span.into();
570        self.as_bytes().get(start.to_usize()..end.to_usize())
571    }
572
573    /// Like [Self::source_slice], but the slice is computed like a selection in an editor, i.e.
574    /// based on line/column positions, rather than raw character indices.
575    ///
576    /// This is useful when mapping LSP operations to content in the source file.
577    pub fn select(&self, mut range: Selection) -> Option<&str> {
578        range.canonicalize();
579
580        let start = self.line_column_to_offset(range.start.line, range.start.character)?;
581        let end = self.line_column_to_offset(range.end.line, range.end.character)?;
582
583        Some(&self.as_str()[start.to_usize()..end.to_usize()])
584    }
585
586    /// Returns the number of lines in the source content
587    pub fn line_count(&self) -> usize {
588        self.line_starts.len()
589    }
590
591    /// Returns the byte index at which the line corresponding to `line_index` starts
592    ///
593    /// Returns `None` if the given index is out of bounds
594    pub fn line_start(&self, line_index: LineIndex) -> Option<ByteIndex> {
595        self.line_starts.get(line_index.to_usize()).copied()
596    }
597
598    /// Returns the index of the last line in this file
599    pub fn last_line_index(&self) -> LineIndex {
600        LineIndex(self.line_count().saturating_sub(1).try_into().expect("too many lines in file"))
601    }
602
603    /// Get the range of byte indices covered by the given line
604    pub fn line_range(&self, line_index: LineIndex) -> Option<Range<ByteIndex>> {
605        let line_start = self.line_start(line_index)?;
606        match self.line_start(line_index + 1) {
607            Some(line_end) => Some(line_start..line_end),
608            None => Some(line_start..ByteIndex(self.content.len() as u32)),
609        }
610    }
611
612    /// Get the index of the line to which `byte_index` belongs
613    pub fn line_index(&self, byte_index: ByteIndex) -> LineIndex {
614        match self.line_starts.binary_search(&byte_index) {
615            Ok(line) => LineIndex(line as u32),
616            Err(next_line) => LineIndex(next_line as u32 - 1),
617        }
618    }
619
620    /// Get the [ByteIndex] corresponding to the given line and column indices.
621    ///
622    /// Returns `None` if the line or column indices are out of bounds.
623    pub fn line_column_to_offset(
624        &self,
625        line_index: LineIndex,
626        column_index: ColumnIndex,
627    ) -> Option<ByteIndex> {
628        let column_index = column_index.to_usize();
629        let line_span = self.line_range(line_index)?;
630        let line_src = self
631            .content
632            .get(line_span.start.to_usize()..line_span.end.to_usize())
633            .expect("invalid line boundaries: invalid utf-8");
634        if line_src.len() < column_index {
635            return None;
636        }
637        let (pre, _) = line_src.split_at(column_index);
638        let start = line_span.start;
639        Some(start + ByteOffset::from_str_len(pre))
640    }
641
642    /// Get a [FileLineCol] corresponding to the line/column in this file at which `byte_index`
643    /// occurs
644    pub fn location(&self, byte_index: ByteIndex) -> Option<FileLineCol> {
645        let line_index = self.line_index(byte_index);
646        let line_start_index = self.line_start(line_index)?;
647        let line_src = self.content.get(line_start_index.to_usize()..byte_index.to_usize())?;
648        let column_index = ColumnIndex::from(line_src.chars().count() as u32);
649        Some(FileLineCol {
650            uri: self.uri.clone(),
651            line: line_index.number(),
652            column: column_index.number(),
653        })
654    }
655
656    /// Update the source document after being notified of a change event.
657    ///
658    /// The `version` indicates the new version of the document
659    ///
660    /// NOTE: This is intended to update a [super::SourceManager]'s view of the content of the
661    /// document, _not_ to perform an update against the actual file, wherever it may be.
662    pub fn update(
663        &mut self,
664        text: String,
665        range: Option<Selection>,
666        version: i32,
667    ) -> Result<(), SourceContentUpdateError> {
668        match range {
669            Some(range) => {
670                let start = self
671                    .line_column_to_offset(range.start.line, range.start.character)
672                    .ok_or(SourceContentUpdateError::InvalidSelectionStart(range.start))?
673                    .to_usize();
674                let end = self
675                    .line_column_to_offset(range.end.line, range.end.character)
676                    .ok_or(SourceContentUpdateError::InvalidSelectionEnd(range.end))?
677                    .to_usize();
678                assert!(start <= end, "start of range must be less than end, got {start}..{end}",);
679                self.content.replace_range(start..end, &text);
680
681                let added_line_starts = compute_line_starts(&text, Some(start as u32));
682                let num_added = added_line_starts.len();
683                let splice_start = range.start.line.to_usize() + 1;
684                // Determine deletion range in line_starts to respect Selection semantics.
685                // For multi-line edits, remove line starts from (start.line + 1) up to end.line
686                // inclusive, since all intervening newlines are removed by the
687                // replacement, regardless of end.character.
688                enum Deletion {
689                    Empty,
690                    Inclusive(usize), // inclusive end index
691                }
692                let deletion = if range.start.line == range.end.line {
693                    Deletion::Empty
694                } else {
695                    let mut end_line_for_splice = range.end.line.to_usize();
696                    if !self.line_starts.is_empty() {
697                        let max_idx = self.line_starts.len() - 1;
698                        if end_line_for_splice > max_idx {
699                            end_line_for_splice = max_idx;
700                        }
701                    }
702                    if end_line_for_splice >= splice_start {
703                        Deletion::Inclusive(end_line_for_splice)
704                    } else {
705                        Deletion::Empty
706                    }
707                };
708
709                match deletion {
710                    Deletion::Empty => {
711                        self.line_starts.splice(splice_start..splice_start, added_line_starts);
712                    },
713                    Deletion::Inclusive(end_idx) => {
714                        self.line_starts.splice(splice_start..=end_idx, added_line_starts);
715                    },
716                }
717
718                let diff =
719                    (text.len() as i32).saturating_sub_unsigned((end as u32) - (start as u32));
720                if diff != 0 {
721                    for i in (splice_start + num_added)..self.line_starts.len() {
722                        self.line_starts[i] =
723                            ByteIndex(self.line_starts[i].to_u32().saturating_add_signed(diff));
724                    }
725                }
726            },
727            None => {
728                self.line_starts = compute_line_starts(&text, None);
729                self.content = text;
730            },
731        }
732
733        self.version = version;
734
735        Ok(())
736    }
737}
738
739#[cfg(feature = "serde")]
740impl SourceContent {
741    fn deserialize_and_recompute_line_starts<'de, D>(deserializer: D) -> Result<Self, D::Error>
742    where
743        D: serde::Deserializer<'de>,
744    {
745        let mut content = SourceContent::deserialize(deserializer)?;
746        content.line_starts = compute_line_starts(&content.content, None);
747        Ok(content)
748    }
749}
750
751fn compute_line_starts(text: &str, text_offset: Option<u32>) -> Vec<ByteIndex> {
752    let bytes = text.as_bytes();
753    let initial_line_offset = match text_offset {
754        Some(_) => None,
755        None => Some(ByteIndex(0)),
756    };
757    let text_offset = text_offset.unwrap_or(0);
758    initial_line_offset
759        .into_iter()
760        .chain(
761            memchr::memchr_iter(b'\n', bytes)
762                .map(|offset| ByteIndex(text_offset + (offset + 1) as u32)),
763        )
764        .collect()
765}
766
767// SOURCE CONTENT INDICES
768// ================================================================================================
769
770/// An index representing the offset in bytes from the start of a source file
771#[derive(Default, Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
772#[cfg_attr(feature = "serde", derive(Deserialize, Serialize))]
773#[cfg_attr(feature = "serde", serde(transparent))]
774pub struct ByteIndex(pub u32);
775
776impl ByteIndex {
777    /// Create a [ByteIndex] from a raw `u32` index
778    pub const fn new(index: u32) -> Self {
779        Self(index)
780    }
781
782    /// Get the raw index as a usize
783    #[inline(always)]
784    pub const fn to_usize(self) -> usize {
785        self.0 as usize
786    }
787
788    /// Get the raw index as a u32
789    #[inline(always)]
790    pub const fn to_u32(self) -> u32 {
791        self.0
792    }
793}
794
795impl core::ops::Add<ByteOffset> for ByteIndex {
796    type Output = ByteIndex;
797
798    fn add(self, rhs: ByteOffset) -> Self {
799        Self((self.0 as i64 + rhs.0) as u32)
800    }
801}
802
803impl core::ops::Add<u32> for ByteIndex {
804    type Output = ByteIndex;
805
806    fn add(self, rhs: u32) -> Self {
807        Self(self.0 + rhs)
808    }
809}
810
811impl core::ops::AddAssign<ByteOffset> for ByteIndex {
812    fn add_assign(&mut self, rhs: ByteOffset) {
813        *self = *self + rhs;
814    }
815}
816
817impl core::ops::AddAssign<u32> for ByteIndex {
818    fn add_assign(&mut self, rhs: u32) {
819        self.0 += rhs;
820    }
821}
822
823impl core::ops::Sub<ByteOffset> for ByteIndex {
824    type Output = ByteIndex;
825
826    fn sub(self, rhs: ByteOffset) -> Self {
827        Self((self.0 as i64 - rhs.0) as u32)
828    }
829}
830
831impl core::ops::Sub<u32> for ByteIndex {
832    type Output = ByteIndex;
833
834    fn sub(self, rhs: u32) -> Self {
835        Self(self.0 - rhs)
836    }
837}
838
839impl core::ops::SubAssign<ByteOffset> for ByteIndex {
840    fn sub_assign(&mut self, rhs: ByteOffset) {
841        *self = *self - rhs;
842    }
843}
844
845impl core::ops::SubAssign<u32> for ByteIndex {
846    fn sub_assign(&mut self, rhs: u32) {
847        self.0 -= rhs;
848    }
849}
850
851impl From<u32> for ByteIndex {
852    fn from(index: u32) -> Self {
853        Self(index)
854    }
855}
856
857impl From<ByteIndex> for u32 {
858    fn from(index: ByteIndex) -> Self {
859        index.0
860    }
861}
862
863impl fmt::Display for ByteIndex {
864    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
865        fmt::Display::fmt(&self.0, f)
866    }
867}
868
869/// An offset in bytes relative to some [ByteIndex]
870#[derive(Default, Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
871pub struct ByteOffset(i64);
872
873impl ByteOffset {
874    /// Compute the offset in bytes represented by the given `char`
875    pub fn from_char_len(c: char) -> ByteOffset {
876        Self(c.len_utf8() as i64)
877    }
878
879    /// Compute the offset in bytes represented by the given `str`
880    pub fn from_str_len(s: &str) -> ByteOffset {
881        Self(s.len() as i64)
882    }
883}
884
885impl core::ops::Add for ByteOffset {
886    type Output = ByteOffset;
887
888    fn add(self, rhs: Self) -> Self {
889        Self(self.0 + rhs.0)
890    }
891}
892
893impl core::ops::AddAssign for ByteOffset {
894    fn add_assign(&mut self, rhs: Self) {
895        self.0 += rhs.0;
896    }
897}
898
899impl core::ops::Sub for ByteOffset {
900    type Output = ByteOffset;
901
902    fn sub(self, rhs: Self) -> Self {
903        Self(self.0 - rhs.0)
904    }
905}
906
907impl core::ops::SubAssign for ByteOffset {
908    fn sub_assign(&mut self, rhs: Self) {
909        self.0 -= rhs.0;
910    }
911}
912
913impl fmt::Display for ByteOffset {
914    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
915        fmt::Display::fmt(&self.0, f)
916    }
917}
918
919macro_rules! declare_dual_number_and_index_type {
920    ($name:ident, $description:literal) => {
921        paste::paste! {
922            declare_dual_number_and_index_type!([<$name Index>], [<$name Number>], $description);
923        }
924    };
925
926    ($index_name:ident, $number_name:ident, $description:literal) => {
927        #[doc = concat!("A zero-indexed ", $description, " number")]
928        #[derive(Default, Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
929        #[cfg_attr(feature = "serde", derive(Deserialize, Serialize))]
930        #[cfg_attr(feature = "serde", serde(transparent))]
931        pub struct $index_name(pub u32);
932
933        impl $index_name {
934            #[doc = concat!("Convert to a [", stringify!($number_name), "]")]
935            pub const fn number(self) -> $number_name {
936                $number_name(unsafe { NonZeroU32::new_unchecked(self.0 + 1) })
937            }
938
939            /// Get the raw index value as a usize
940            #[inline(always)]
941            pub const fn to_usize(self) -> usize {
942                self.0 as usize
943            }
944
945            /// Get the raw index value as a u32
946            #[inline(always)]
947            pub const fn to_u32(self) -> u32 {
948                self.0
949            }
950
951            /// Add `offset` to this index, returning `None` on overflow
952            pub fn checked_add(self, offset: u32) -> Option<Self> {
953                self.0.checked_add(offset).map(Self)
954            }
955
956            /// Add a signed `offset` to this index, returning `None` on overflow
957            pub fn checked_add_signed(self, offset: i32) -> Option<Self> {
958                self.0.checked_add_signed(offset).map(Self)
959            }
960
961            /// Subtract `offset` from this index, returning `None` on underflow
962            pub fn checked_sub(self, offset: u32) -> Option<Self> {
963                self.0.checked_sub(offset).map(Self)
964            }
965
966            /// Add `offset` to this index, saturating to `u32::MAX` on overflow
967            pub const fn saturating_add(self, offset: u32) -> Self {
968                Self(self.0.saturating_add(offset))
969            }
970
971            /// Add a signed `offset` to this index, saturating to `0` on underflow, and `u32::MAX`
972            /// on overflow.
973            pub const fn saturating_add_signed(self, offset: i32) -> Self {
974                Self(self.0.saturating_add_signed(offset))
975            }
976
977            /// Subtract `offset` from this index, saturating to `0` on overflow
978            pub const fn saturating_sub(self, offset: u32) -> Self {
979                Self(self.0.saturating_sub(offset))
980            }
981        }
982
983        impl From<u32> for $index_name {
984            #[inline]
985            fn from(index: u32) -> Self {
986                Self(index)
987            }
988        }
989
990        impl From<$number_name> for $index_name {
991            #[inline]
992            fn from(index: $number_name) -> Self {
993                Self(index.to_u32() - 1)
994            }
995        }
996
997        impl core::ops::Add<u32> for $index_name {
998            type Output = Self;
999
1000            #[inline]
1001            fn add(self, rhs: u32) -> Self {
1002                Self(self.0 + rhs)
1003            }
1004        }
1005
1006        impl core::ops::AddAssign<u32> for $index_name {
1007            fn add_assign(&mut self, rhs: u32) {
1008                let result = *self + rhs;
1009                *self = result;
1010            }
1011        }
1012
1013        impl core::ops::Add<i32> for $index_name {
1014            type Output = Self;
1015
1016            fn add(self, rhs: i32) -> Self {
1017                self.checked_add_signed(rhs).expect("invalid offset: overflow occurred")
1018            }
1019        }
1020
1021        impl core::ops::AddAssign<i32> for $index_name {
1022            fn add_assign(&mut self, rhs: i32) {
1023                let result = *self + rhs;
1024                *self = result;
1025            }
1026        }
1027
1028        impl core::ops::Sub<u32> for $index_name {
1029            type Output = Self;
1030
1031            #[inline]
1032            fn sub(self, rhs: u32) -> Self {
1033                Self(self.0 - rhs)
1034            }
1035        }
1036
1037        impl core::ops::SubAssign<u32> for $index_name {
1038            fn sub_assign(&mut self, rhs: u32) {
1039                let result = *self - rhs;
1040                *self = result;
1041            }
1042        }
1043
1044        impl fmt::Display for $index_name {
1045            fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1046                fmt::Display::fmt(&self.0, f)
1047            }
1048        }
1049
1050        #[doc = concat!("A one-indexed ", $description, " number")]
1051        #[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
1052        #[cfg_attr(feature = "serde", derive(Deserialize, Serialize))]
1053        #[cfg_attr(feature = "serde", serde(transparent))]
1054        pub struct $number_name(NonZeroU32);
1055
1056        impl Default for $number_name {
1057            fn default() -> Self {
1058                Self(unsafe { NonZeroU32::new_unchecked(1) })
1059            }
1060        }
1061
1062        impl $number_name {
1063            pub const fn new(number: u32) -> Option<Self> {
1064                match NonZeroU32::new(number) {
1065                    Some(num) => Some(Self(num)),
1066                    None => None,
1067                }
1068            }
1069
1070            #[doc = concat!("Convert to a [", stringify!($index_name), "]")]
1071            pub const fn to_index(self) -> $index_name {
1072                $index_name(self.to_u32().saturating_sub(1))
1073            }
1074
1075            /// Get the raw value as a usize
1076            #[inline(always)]
1077            pub const fn to_usize(self) -> usize {
1078                self.0.get() as usize
1079            }
1080
1081            /// Get the raw value as a u32
1082            #[inline(always)]
1083            pub const fn to_u32(self) -> u32 {
1084                self.0.get()
1085            }
1086
1087            /// Add `offset` to this index, returning `None` on overflow
1088            pub fn checked_add(self, offset: u32) -> Option<Self> {
1089                self.0.checked_add(offset).map(Self)
1090            }
1091
1092            /// Add a signed `offset` to this index, returning `None` on overflow
1093            pub fn checked_add_signed(self, offset: i32) -> Option<Self> {
1094                self.0.get().checked_add_signed(offset).and_then(Self::new)
1095            }
1096
1097            /// Subtract `offset` from this index, returning `None` on underflow
1098            pub fn checked_sub(self, offset: u32) -> Option<Self> {
1099                self.0.get().checked_sub(offset).and_then(Self::new)
1100            }
1101
1102            /// Add `offset` to this index, saturating to `u32::MAX` on overflow
1103            pub const fn saturating_add(self, offset: u32) -> Self {
1104                Self(unsafe { NonZeroU32::new_unchecked(self.0.get().saturating_add(offset)) })
1105            }
1106
1107            /// Add a signed `offset` to this index, saturating to `0` on underflow, and `u32::MAX`
1108            /// on overflow.
1109            pub fn saturating_add_signed(self, offset: i32) -> Self {
1110                Self::new(self.to_u32().saturating_add_signed(offset)).unwrap_or_default()
1111            }
1112
1113            /// Subtract `offset` from this index, saturating to `0` on overflow
1114            pub fn saturating_sub(self, offset: u32) -> Self {
1115                Self::new(self.to_u32().saturating_sub(offset)).unwrap_or_default()
1116            }
1117        }
1118
1119        impl From<NonZeroU32> for $number_name {
1120            #[inline]
1121            fn from(index: NonZeroU32) -> Self {
1122                Self(index)
1123            }
1124        }
1125
1126        impl From<$index_name> for $number_name {
1127            #[inline]
1128            fn from(index: $index_name) -> Self {
1129                Self(unsafe { NonZeroU32::new_unchecked(index.to_u32() + 1) })
1130            }
1131        }
1132
1133        impl core::ops::Add<u32> for $number_name {
1134            type Output = Self;
1135
1136            #[inline]
1137            fn add(self, rhs: u32) -> Self {
1138                Self(unsafe { NonZeroU32::new_unchecked(self.0.get() + rhs) })
1139            }
1140        }
1141
1142        impl core::ops::AddAssign<u32> for $number_name {
1143            fn add_assign(&mut self, rhs: u32) {
1144                let result = *self + rhs;
1145                *self = result;
1146            }
1147        }
1148
1149        impl core::ops::Add<i32> for $number_name {
1150            type Output = Self;
1151
1152            fn add(self, rhs: i32) -> Self {
1153                self.to_u32()
1154                    .checked_add_signed(rhs)
1155                    .and_then(Self::new)
1156                    .expect("invalid offset: overflow occurred")
1157            }
1158        }
1159
1160        impl core::ops::AddAssign<i32> for $number_name {
1161            fn add_assign(&mut self, rhs: i32) {
1162                let result = *self + rhs;
1163                *self = result;
1164            }
1165        }
1166
1167        impl core::ops::Sub<u32> for $number_name {
1168            type Output = Self;
1169
1170            #[inline]
1171            fn sub(self, rhs: u32) -> Self {
1172                self.to_u32()
1173                    .checked_sub(rhs)
1174                    .and_then(Self::new)
1175                    .expect("invalid offset: overflow occurred")
1176            }
1177        }
1178
1179        impl core::ops::SubAssign<u32> for $number_name {
1180            fn sub_assign(&mut self, rhs: u32) {
1181                let result = *self - rhs;
1182                *self = result;
1183            }
1184        }
1185
1186        impl fmt::Display for $number_name {
1187            fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1188                fmt::Display::fmt(&self.0, f)
1189            }
1190        }
1191    };
1192}
1193
1194declare_dual_number_and_index_type!(Line, "line");
1195declare_dual_number_and_index_type!(Column, "column");
1196
1197#[cfg(test)]
1198mod tests {
1199    use super::*;
1200
1201    #[test]
1202    fn source_content_line_starts() {
1203        const CONTENT: &str = "\
1204begin
1205  push.1
1206  push.2
1207  add
1208end
1209";
1210        let content = SourceContent::new("masm", "foo.masm", CONTENT);
1211
1212        assert_eq!(content.line_count(), 6);
1213        assert_eq!(
1214            content
1215                .byte_slice(content.line_range(LineIndex(0)).expect("invalid line"))
1216                .expect("invalid byte range"),
1217            "begin\n".as_bytes()
1218        );
1219        assert_eq!(
1220            content
1221                .byte_slice(content.line_range(LineIndex(1)).expect("invalid line"))
1222                .expect("invalid byte range"),
1223            "  push.1\n".as_bytes()
1224        );
1225        assert_eq!(
1226            content
1227                .byte_slice(content.line_range(content.last_line_index()).expect("invalid line"))
1228                .expect("invalid byte range"),
1229            "".as_bytes()
1230        );
1231    }
1232
1233    #[test]
1234    fn source_content_line_starts_after_update() {
1235        const CONTENT: &str = "\
1236begin
1237  push.1
1238  push.2
1239  add
1240end
1241";
1242        const FRAGMENT: &str = "  push.2
1243  mul
1244end
1245";
1246        let mut content = SourceContent::new("masm", "foo.masm", CONTENT);
1247        content
1248            .update(FRAGMENT.to_string(), Some(Selection::from(LineIndex(4)..LineIndex(5))), 1)
1249            .expect("update failed");
1250
1251        assert_eq!(
1252            content.as_str(),
1253            "\
1254begin
1255  push.1
1256  push.2
1257  add
1258  push.2
1259  mul
1260end
1261"
1262        );
1263        assert_eq!(content.line_count(), 8);
1264        assert_eq!(
1265            content
1266                .byte_slice(content.line_range(LineIndex(0)).expect("invalid line"))
1267                .expect("invalid byte range"),
1268            "begin\n".as_bytes()
1269        );
1270        assert_eq!(
1271            content
1272                .byte_slice(content.line_range(LineIndex(3)).expect("invalid line"))
1273                .expect("invalid byte range"),
1274            "  add\n".as_bytes()
1275        );
1276        assert_eq!(
1277            content
1278                .byte_slice(content.line_range(LineIndex(4)).expect("invalid line"))
1279                .expect("invalid byte range"),
1280            "  push.2\n".as_bytes()
1281        );
1282        assert_eq!(
1283            content
1284                .byte_slice(content.line_range(content.last_line_index()).expect("invalid line"))
1285                .expect("invalid byte range"),
1286            "".as_bytes()
1287        );
1288    }
1289
1290    /// Test that backslash-before-newline is NOT treated as a line continuation.
1291    #[test]
1292    fn source_content_line_starts_with_trailing_backslash() {
1293        const CONTENT: &str =
1294            "//! Build with:\n//!   cargo build \\\n//!     --release\nfn main() {}\n";
1295
1296        let content = SourceContent::new("rust", "example.rs", CONTENT);
1297
1298        // Should have 5 lines (4 lines of content + 1 empty line after final newline)
1299        // Line 0: "//! Build with:\n"
1300        // Line 1: "//!   cargo build \\\n"
1301        // Line 2: "//!     --release\n"
1302        // Line 3: "fn main() {}\n"
1303        // Line 4: "" (empty line after final newline)
1304        assert_eq!(content.line_count(), 5);
1305
1306        // Verify each line's content
1307        assert_eq!(
1308            content
1309                .byte_slice(content.line_range(LineIndex(0)).expect("invalid line"))
1310                .expect("invalid byte range"),
1311            "//! Build with:\n".as_bytes()
1312        );
1313        assert_eq!(
1314            content
1315                .byte_slice(content.line_range(LineIndex(1)).expect("invalid line"))
1316                .expect("invalid byte range"),
1317            "//!   cargo build \\\n".as_bytes()
1318        );
1319        assert_eq!(
1320            content
1321                .byte_slice(content.line_range(LineIndex(2)).expect("invalid line"))
1322                .expect("invalid byte range"),
1323            "//!     --release\n".as_bytes()
1324        );
1325        assert_eq!(
1326            content
1327                .byte_slice(content.line_range(LineIndex(3)).expect("invalid line"))
1328                .expect("invalid byte range"),
1329            "fn main() {}\n".as_bytes()
1330        );
1331
1332        // Verify line_column_to_offset works for all lines, including those after
1333        // backslash-ended lines.
1334        let offset_line0 = content.line_column_to_offset(LineIndex(0), ColumnIndex(0));
1335        let offset_line1 = content.line_column_to_offset(LineIndex(1), ColumnIndex(0));
1336        let offset_line2 = content.line_column_to_offset(LineIndex(2), ColumnIndex(0));
1337        let offset_line3 = content.line_column_to_offset(LineIndex(3), ColumnIndex(0));
1338
1339        assert!(offset_line0.is_some(), "line 0 should be accessible");
1340        assert!(offset_line1.is_some(), "line 1 should be accessible");
1341        assert!(offset_line2.is_some(), "line 2 should be accessible");
1342        assert!(offset_line3.is_some(), "line 3 should be accessible");
1343
1344        // Verify the offsets are at the expected byte positions
1345        assert_eq!(offset_line0.unwrap().to_u32(), 0);
1346        assert_eq!(offset_line1.unwrap().to_u32(), 16); // After "//! Build with:\n"
1347        assert_eq!(offset_line2.unwrap().to_u32(), 36); // After "//!   cargo build \\\n"
1348        assert_eq!(offset_line3.unwrap().to_u32(), 54); // After "//!     --release\n"
1349    }
1350
1351    /// Test with multiple consecutive backslash-ended lines
1352    #[test]
1353    fn source_content_line_starts_multiple_trailing_backslashes() {
1354        // Multiple lines ending with backslashes
1355        const CONTENT: &str = "line1 \\\nline2 \\\nline3 \\\nline4\n";
1356
1357        let content = SourceContent::new("text", "test.txt", CONTENT);
1358
1359        // Should have 5 lines (4 lines of content + 1 empty line after final newline)
1360        assert_eq!(content.line_count(), 5);
1361
1362        // Verify each line is correctly separated
1363        assert_eq!(
1364            content
1365                .byte_slice(content.line_range(LineIndex(0)).expect("invalid line"))
1366                .expect("invalid byte range"),
1367            "line1 \\\n".as_bytes()
1368        );
1369        assert_eq!(
1370            content
1371                .byte_slice(content.line_range(LineIndex(1)).expect("invalid line"))
1372                .expect("invalid byte range"),
1373            "line2 \\\n".as_bytes()
1374        );
1375        assert_eq!(
1376            content
1377                .byte_slice(content.line_range(LineIndex(2)).expect("invalid line"))
1378                .expect("invalid byte range"),
1379            "line3 \\\n".as_bytes()
1380        );
1381        assert_eq!(
1382            content
1383                .byte_slice(content.line_range(LineIndex(3)).expect("invalid line"))
1384                .expect("invalid byte range"),
1385            "line4\n".as_bytes()
1386        );
1387    }
1388}