Skip to main content

litcheck_core/diagnostics/
source_file.rs

1use core::{fmt, num::NonZeroU32};
2use std::sync::Arc;
3
4use serde::{Deserialize, Serialize};
5
6use super::{FileLineCol, FileName, Position, Selection, SourceId, SourceSpan};
7use crate::range::Range;
8
9// SOURCE LANGUAGE
10// ================================================================================================
11
12#[derive(Default, Debug, Copy, Clone, PartialEq, Eq)]
13pub enum SourceLanguage {
14    #[default]
15    Unknown,
16    C,
17    Cpp,
18    Python,
19    Rust,
20    Other(&'static str),
21}
22
23impl AsRef<str> for SourceLanguage {
24    fn as_ref(&self) -> &str {
25        match self {
26            Self::Unknown => "",
27            Self::C => "c",
28            Self::Cpp => "c++",
29            Self::Python => "python",
30            Self::Rust => "rust",
31            Self::Other(other) => other,
32        }
33    }
34}
35
36impl SourceLanguage {
37    pub fn from_extension(extension: &str) -> Self {
38        match extension {
39            "c" | "h" => Self::C,
40            "cpp" | "hpp" => Self::Cpp,
41            "rs" => Self::Rust,
42            "py" => Self::Python,
43            _ => Self::Unknown,
44        }
45    }
46
47    pub fn from_path(path: &std::path::Path) -> Self {
48        path.extension()
49            .and_then(|ext| ext.to_str())
50            .map(Self::from_extension)
51            .unwrap_or_default()
52    }
53}
54
55// SOURCE FILE
56// ================================================================================================
57
58/// A [SourceFile] represents a single file stored in a [super::SourceManager]
59#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
60pub struct SourceFile {
61    /// The unique identifier allocated for this [SourceFile] by its owning [super::SourceManager]
62    id: SourceId,
63    /// The file content
64    content: SourceContent,
65}
66
67impl miette::SourceCode for SourceFile {
68    fn read_span<'a>(
69        &'a self,
70        span: &miette::SourceSpan,
71        context_lines_before: usize,
72        context_lines_after: usize,
73    ) -> Result<Box<dyn miette::SpanContents<'a> + 'a>, miette::MietteError> {
74        let mut start =
75            u32::try_from(span.offset()).map_err(|_| miette::MietteError::OutOfBounds)?;
76        let len = u32::try_from(span.len()).map_err(|_| miette::MietteError::OutOfBounds)?;
77        let mut end = start
78            .checked_add(len)
79            .ok_or(miette::MietteError::OutOfBounds)?;
80        if context_lines_before > 0 {
81            let line_index = self.content.line_index(start.into());
82            let start_line_index = line_index.saturating_sub(context_lines_before as u32);
83            start = self
84                .content
85                .line_start(start_line_index)
86                .map(|idx| idx.to_u32())
87                .unwrap_or(0);
88        }
89        if context_lines_after > 0 {
90            let line_index = self.content.line_index(end.into());
91            let end_line_index = line_index
92                .checked_add(context_lines_after as u32)
93                .ok_or(miette::MietteError::OutOfBounds)?;
94            end = self
95                .content
96                .line_range(end_line_index)
97                .map(|range| range.end.to_u32())
98                .unwrap_or_else(|| self.content.source_range().end.to_u32());
99        }
100        Ok(Box::new(ScopedSourceFileRef {
101            file: self,
102            span: miette::SourceSpan::new((start as usize).into(), end.abs_diff(start) as usize),
103        }))
104    }
105}
106
107impl SourceFile {
108    /// Create a new [SourceFile] from its raw components
109    pub fn new(
110        id: SourceId,
111        lang: SourceLanguage,
112        uri: FileName,
113        content: impl Into<Box<str>>,
114    ) -> Self {
115        let content = SourceContent::new(lang, uri, content.into());
116        Self { id, content }
117    }
118
119    /// This function is intended for use by [super::SourceManager] implementations that need to
120    /// construct a [SourceFile] from its raw components (i.e. the identifier for the source file
121    /// and its content).
122    ///
123    /// Since the only entity that should be constructing a [SourceId] is a [super::SourceManager],
124    /// it is only valid to call this function in one of two scenarios:
125    ///
126    /// 1. You are a [super::SourceManager] constructing a [SourceFile] after allocating a
127    ///    [SourceId]
128    /// 2. You pass [`SourceId::default()`], i.e. [`SourceId::UNKNOWN`] for the source identifier.
129    ///    The resulting [SourceFile] will be valid and safe to use in a context where there isn't a
130    ///    [super::SourceManager] present. If there is a source manager in use, then constructing
131    ///    detached [SourceFile]s is _not_ recommended, because it will make it confusing to
132    ///    determine whether a given [SourceFile] reference is safe to use.
133    ///
134    /// You should rarely, if ever, fall in camp 2 - but it can be handy in some narrow cases
135    pub fn from_raw_parts(id: SourceId, content: SourceContent) -> Self {
136        Self { id, content }
137    }
138
139    /// Get the [SourceId] associated with this file
140    pub const fn id(&self) -> SourceId {
141        self.id
142    }
143
144    /// Get the name of this source file
145    pub fn uri(&self) -> &FileName {
146        self.content.uri()
147    }
148
149    /// Returns a reference to the underlying [SourceContent]
150    pub fn content(&self) -> &SourceContent {
151        &self.content
152    }
153
154    /// Returns a mutable reference to the underlying [SourceContent]
155    pub fn content_mut(&mut self) -> &mut SourceContent {
156        &mut self.content
157    }
158
159    /// Returns the number of lines in this file
160    pub fn line_count(&self) -> usize {
161        self.content.line_starts.len()
162    }
163
164    /// Returns the number of bytes in this file
165    pub fn len(&self) -> usize {
166        self.content.len()
167    }
168
169    /// Returns true if this file is empty
170    pub fn is_empty(&self) -> bool {
171        self.content.is_empty()
172    }
173
174    /// Get the underlying content of this file
175    #[inline(always)]
176    pub fn as_str(&self) -> &str {
177        self.content.as_str()
178    }
179
180    /// Get the underlying content of this file as a byte slice
181    #[inline(always)]
182    pub fn as_bytes(&self) -> &[u8] {
183        self.content.as_bytes()
184    }
185
186    /// Returns a [SourceSpan] covering the entirety of this file
187    #[inline]
188    pub fn source_span(&self) -> SourceSpan {
189        let range = self.content.source_range();
190        SourceSpan::new(self.id, range)
191    }
192
193    /// Returns a subset of the underlying content as a string slice.
194    ///
195    /// The bounds of the given span are byte indices, _not_ character indices.
196    ///
197    /// Returns `None` if the given span is out of bounds, or if the bounds do not
198    /// fall on valid UTF-8 character boundaries.
199    #[inline(always)]
200    pub fn source_slice(&self, span: impl Into<Range<usize>>) -> Option<&str> {
201        self.content.source_slice(span)
202    }
203
204    /// Returns a [SourceFileRef] corresponding to the bytes contained in the specified span.
205    pub fn slice(self: &Arc<Self>, span: impl Into<Range<u32>>) -> SourceFileRef {
206        SourceFileRef::new(Arc::clone(self), span)
207    }
208
209    /// Get a [SourceSpan] which points to the first byte of the character at `column` on `line`
210    ///
211    /// Returns `None` if the given line/column is out of bounds for this file.
212    pub fn line_column_to_span(
213        &self,
214        line: LineNumber,
215        column: ColumnNumber,
216    ) -> Option<SourceSpan> {
217        let offset = self
218            .content
219            .line_column_to_offset(line.into(), column.into())?;
220        Some(SourceSpan::at(self.id, offset.0))
221    }
222
223    /// Get a [FileLineCol] equivalent to the start of the given [SourceSpan]
224    pub fn location(&self, span: SourceSpan) -> FileLineCol {
225        assert_eq!(span.source_id(), self.id, "mismatched source ids");
226
227        self.content
228            .location(ByteIndex(span.into_range().start))
229            .expect("invalid source span: starting byte is out of bounds")
230    }
231}
232
233impl AsRef<str> for SourceFile {
234    #[inline(always)]
235    fn as_ref(&self) -> &str {
236        self.as_str()
237    }
238}
239
240impl AsRef<[u8]> for SourceFile {
241    #[inline(always)]
242    fn as_ref(&self) -> &[u8] {
243        self.as_bytes()
244    }
245}
246
247// SOURCE FILE REF
248// ================================================================================================
249
250/// A reference to a specific spanned region of a [SourceFile], that provides access to the actual
251/// [SourceFile], but scoped to the span it was created with.
252///
253/// This is useful in error types that implement [miette::Diagnostic], as it contains all of the
254/// data necessary to render the source code being referenced, without a [super::SourceManager] on
255/// hand.
256#[derive(Debug, Clone)]
257pub struct SourceFileRef {
258    file: Arc<SourceFile>,
259    span: SourceSpan,
260}
261
262impl SourceFileRef {
263    /// Create a [SourceFileRef] from a [SourceFile] and desired span (in bytes)
264    ///
265    /// The given span will be constrained to the bytes of `file`, so a span that reaches out of
266    /// bounds will have its end bound set to the last byte of the file.
267    pub fn new(file: Arc<SourceFile>, span: impl Into<Range<u32>>) -> Self {
268        let span = span.into();
269        let end = core::cmp::min(span.end, file.len() as u32);
270        let span = SourceSpan::new(file.id(), Range::new(span.start, end));
271        Self { file, span }
272    }
273
274    /// Returns a ref-counted handle to the underlying [SourceFile]
275    pub fn source_file(&self) -> Arc<SourceFile> {
276        self.file.clone()
277    }
278
279    /// Returns the URI of the file this [SourceFileRef] is selecting
280    pub fn uri(&self) -> &FileName {
281        self.file.uri()
282    }
283
284    /// Returns the [SourceSpan] selected by this [SourceFileRef]
285    pub const fn span(&self) -> SourceSpan {
286        self.span
287    }
288
289    /// Returns the underlying `str` selected by this [SourceFileRef]
290    pub fn as_str(&self) -> &str {
291        self.file.source_slice(self.span).unwrap()
292    }
293
294    /// Returns the underlying bytes selected by this [SourceFileRef]
295    #[inline]
296    pub fn as_bytes(&self) -> &[u8] {
297        self.as_str().as_bytes()
298    }
299
300    /// Returns the number of bytes represented by the subset of the underlying file that is covered
301    /// by this [SourceFileRef]
302    pub fn len(&self) -> usize {
303        self.span.len()
304    }
305
306    /// Returns true if this selection is empty
307    pub fn is_empty(&self) -> bool {
308        self.len() == 0
309    }
310}
311
312impl Eq for SourceFileRef {}
313
314impl PartialEq for SourceFileRef {
315    fn eq(&self, other: &Self) -> bool {
316        self.as_str() == other.as_str()
317    }
318}
319
320impl Ord for SourceFileRef {
321    fn cmp(&self, other: &Self) -> core::cmp::Ordering {
322        self.as_str().cmp(other.as_str())
323    }
324}
325
326impl PartialOrd for SourceFileRef {
327    #[inline(always)]
328    fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> {
329        Some(self.cmp(other))
330    }
331}
332
333impl core::hash::Hash for SourceFileRef {
334    fn hash<H: core::hash::Hasher>(&self, state: &mut H) {
335        self.as_str().hash(state);
336    }
337}
338
339impl AsRef<str> for SourceFileRef {
340    #[inline(always)]
341    fn as_ref(&self) -> &str {
342        self.as_str()
343    }
344}
345
346impl AsRef<[u8]> for SourceFileRef {
347    #[inline(always)]
348    fn as_ref(&self) -> &[u8] {
349        self.as_bytes()
350    }
351}
352
353impl From<&SourceFileRef> for miette::SourceSpan {
354    fn from(source: &SourceFileRef) -> Self {
355        source.span.into()
356    }
357}
358
359/// Used to implement [miette::SpanContents] for [SourceFile] and [SourceFileRef]
360struct ScopedSourceFileRef<'a> {
361    file: &'a SourceFile,
362    span: miette::SourceSpan,
363}
364
365impl<'a> miette::SpanContents<'a> for ScopedSourceFileRef<'a> {
366    #[inline]
367    fn data(&self) -> &'a [u8] {
368        let start = self.span.offset();
369        let end = start + self.span.len();
370        &self.file.as_bytes()[start..end]
371    }
372
373    #[inline]
374    fn span(&self) -> &miette::SourceSpan {
375        &self.span
376    }
377
378    fn line(&self) -> usize {
379        let offset = self.span.offset() as u32;
380        self.file.content.line_index(offset.into()).to_usize()
381    }
382
383    fn column(&self) -> usize {
384        let start = self.span.offset() as u32;
385        let end = start + self.span.len() as u32;
386        let span = SourceSpan::new(self.file.id(), Range::new(start, end));
387        let loc = self.file.location(span);
388        loc.column.to_index().to_usize()
389    }
390
391    #[inline]
392    fn line_count(&self) -> usize {
393        self.file.line_count()
394    }
395
396    #[inline]
397    fn name(&self) -> Option<&str> {
398        Some(self.file.uri().as_str())
399    }
400
401    #[inline]
402    fn language(&self) -> Option<&str> {
403        None
404    }
405}
406
407impl miette::SourceCode for SourceFileRef {
408    #[inline]
409    fn read_span<'a>(
410        &'a self,
411        span: &miette::SourceSpan,
412        context_lines_before: usize,
413        context_lines_after: usize,
414    ) -> Result<Box<dyn miette::SpanContents<'a> + 'a>, miette::MietteError> {
415        self.file
416            .read_span(span, context_lines_before, context_lines_after)
417    }
418}
419
420// SOURCE CONTENT
421// ================================================================================================
422
423/// Represents key information about a source file and its content:
424///
425/// * The path to the file (or its name, in the case of virtual files)
426/// * The content of the file
427/// * The byte offsets of every line in the file, for use in looking up line/column information
428#[derive(Clone)]
429pub struct SourceContent {
430    /// The language identifier for this source file
431    language: Box<str>,
432    /// The path (or name) of this file
433    uri: FileName,
434    /// The underlying content of this file
435    content: String,
436    /// The byte offsets for each line in this file
437    line_starts: Vec<ByteIndex>,
438    /// The document version
439    version: i32,
440}
441
442impl fmt::Debug for SourceContent {
443    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
444        let Self {
445            language,
446            uri,
447            content,
448            line_starts,
449            version,
450        } = self;
451        f.debug_struct("SourceContent")
452            .field("version", version)
453            .field("language", language)
454            .field("uri", uri)
455            .field("size_in_bytes", &content.len())
456            .field("line_count", &line_starts.len())
457            .field("content", content)
458            .finish()
459    }
460}
461
462impl Eq for SourceContent {}
463
464impl PartialEq for SourceContent {
465    #[inline]
466    fn eq(&self, other: &Self) -> bool {
467        self.language == other.language && self.uri == other.uri && self.content == other.content
468    }
469}
470
471impl Ord for SourceContent {
472    #[inline]
473    fn cmp(&self, other: &Self) -> core::cmp::Ordering {
474        self.uri
475            .cmp(&other.uri)
476            .then_with(|| self.content.cmp(&other.content))
477    }
478}
479
480impl PartialOrd for SourceContent {
481    #[inline]
482    fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> {
483        Some(self.cmp(other))
484    }
485}
486
487impl core::hash::Hash for SourceContent {
488    fn hash<H: core::hash::Hasher>(&self, state: &mut H) {
489        self.language.hash(state);
490        self.uri.hash(state);
491        self.content.hash(state);
492    }
493}
494
495#[derive(Debug, thiserror::Error)]
496pub enum SourceContentUpdateError {
497    #[error("invalid content selection: start position of {}:{} is out of bounds", .0.line, .0.character)]
498    InvalidSelectionStart(Position),
499    #[error("invalid content selection: end position of {}:{} is out of bounds", .0.line, .0.character)]
500    InvalidSelectionEnd(Position),
501}
502
503impl SourceContent {
504    /// Create a new [SourceContent] from the (possibly virtual) file path, and its content as a
505    /// UTF-8 string.
506    ///
507    /// When created, the line starts for this file will be computed, which requires scanning the
508    /// file content once.
509    pub fn new(
510        language: impl AsRef<str>,
511        uri: impl Into<FileName>,
512        content: impl Into<String>,
513    ) -> Self {
514        let language = language.as_ref().to_string().into_boxed_str();
515        let content: String = content.into();
516        let bytes = content.as_bytes();
517
518        assert!(
519            bytes.len() < u32::MAX as usize,
520            "unsupported source file: current maximum supported length in bytes is 2^32"
521        );
522
523        let line_starts = compute_line_starts(&content, None);
524
525        Self {
526            language,
527            uri: uri.into(),
528            content,
529            line_starts,
530            version: 0,
531        }
532    }
533
534    /// Get the language identifier of this source file
535    pub fn language(&self) -> &str {
536        &self.language
537    }
538
539    /// Get the current version of this source file's content
540    pub fn version(&self) -> i32 {
541        self.version
542    }
543
544    /// Set the current version of this content
545    #[inline(always)]
546    pub fn set_version(&mut self, version: i32) {
547        self.version = version;
548    }
549
550    /// Get the URI of this source file
551    #[inline]
552    pub fn uri(&self) -> &FileName {
553        &self.uri
554    }
555
556    /// Returns the underlying content as a string slice
557    #[inline(always)]
558    pub fn as_str(&self) -> &str {
559        self.content.as_ref()
560    }
561
562    /// Returns the underlying content as a byte slice
563    #[inline(always)]
564    pub fn as_bytes(&self) -> &[u8] {
565        self.content.as_bytes()
566    }
567
568    /// Returns the size in bytes of the underlying content
569    #[inline(always)]
570    pub fn len(&self) -> usize {
571        self.content.len()
572    }
573
574    /// Returns true if the underlying content is empty
575    #[inline(always)]
576    pub fn is_empty(&self) -> bool {
577        self.content.is_empty()
578    }
579
580    /// Returns the range of valid byte indices for this file
581    #[inline]
582    pub fn source_range(&self) -> Range<ByteIndex> {
583        Range::new(ByteIndex::new(0), ByteIndex::new(self.content.len() as u32))
584    }
585
586    /// Returns a subset of the underlying content as a string slice.
587    ///
588    /// The bounds of the given span are byte indices, _not_ character indices.
589    ///
590    /// Returns `None` if the given span is out of bounds, or if the bounds do not
591    /// fall on valid UTF-8 character boundaries.
592    #[inline(always)]
593    pub fn source_slice(&self, span: impl Into<Range<usize>>) -> Option<&str> {
594        let span = span.into();
595        self.as_str().get(span.into_range())
596    }
597
598    /// Returns a subset of the underlying content as a byte slice.
599    ///
600    /// Returns `None` if the given span is out of bounds
601    #[inline(always)]
602    pub fn byte_slice(&self, span: impl Into<Range<ByteIndex>>) -> Option<&[u8]> {
603        let Range { start, end } = span.into();
604        self.as_bytes().get(start.to_usize()..end.to_usize())
605    }
606
607    /// Like [Self::source_slice], but the slice is computed like a selection in an editor, i.e.
608    /// based on line/column positions, rather than raw character indices.
609    ///
610    /// This is useful when mapping LSP operations to content in the source file.
611    pub fn select(&self, mut range: Selection) -> Option<&str> {
612        range.canonicalize();
613
614        let start = self.line_column_to_offset(range.start.line, range.start.character)?;
615        let end = self.line_column_to_offset(range.end.line, range.end.character)?;
616
617        Some(&self.as_str()[start.to_usize()..end.to_usize()])
618    }
619
620    /// Returns the number of lines in the source content
621    pub fn line_count(&self) -> usize {
622        self.line_starts.len()
623    }
624
625    /// Returns the byte index at which the line corresponding to `line_index` starts
626    ///
627    /// Returns `None` if the given index is out of bounds
628    pub fn line_start(&self, line_index: LineIndex) -> Option<ByteIndex> {
629        self.line_starts.get(line_index.to_usize()).copied()
630    }
631
632    /// Returns the index of the last line in this file
633    pub fn last_line_index(&self) -> LineIndex {
634        LineIndex(
635            self.line_count()
636                .saturating_sub(1)
637                .try_into()
638                .expect("too many lines in file"),
639        )
640    }
641
642    /// Get the range of byte indices covered by the given line
643    pub fn line_range(&self, line_index: LineIndex) -> Option<Range<ByteIndex>> {
644        let line_start = self.line_start(line_index)?;
645        match self.line_start(line_index + 1) {
646            Some(line_end) => Some(Range::new(line_start, line_end)),
647            None => Some(Range::new(line_start, ByteIndex(self.content.len() as u32))),
648        }
649    }
650
651    /// Get the index of the line to which `byte_index` belongs
652    pub fn line_index(&self, byte_index: ByteIndex) -> LineIndex {
653        match self.line_starts.binary_search(&byte_index) {
654            Ok(line) => LineIndex(line as u32),
655            Err(next_line) => LineIndex(next_line as u32 - 1),
656        }
657    }
658
659    /// Get the [ByteIndex] corresponding to the given line and column indices.
660    ///
661    /// Returns `None` if the line or column indices are out of bounds.
662    pub fn line_column_to_offset(
663        &self,
664        line_index: LineIndex,
665        column_index: ColumnIndex,
666    ) -> Option<ByteIndex> {
667        let column_index = column_index.to_usize();
668        let line_span = self.line_range(line_index)?;
669        let line_src = self
670            .content
671            .get(line_span.start.to_usize()..line_span.end.to_usize())
672            .expect("invalid line boundaries: invalid utf-8");
673        if line_src.len() < column_index {
674            return None;
675        }
676        let (pre, _) = line_src.split_at(column_index);
677        let start = line_span.start;
678        Some(start + ByteOffset::from_str_len(pre))
679    }
680
681    /// Get a [FileLineCol] corresponding to the line/column in this file at which `byte_index`
682    /// occurs
683    pub fn location(&self, byte_index: ByteIndex) -> Option<FileLineCol> {
684        let line_index = self.line_index(byte_index);
685        let line_start_index = self.line_start(line_index)?;
686        let line_src = self
687            .content
688            .get(line_start_index.to_usize()..byte_index.to_usize())?;
689        let column_index = ColumnIndex::from(line_src.chars().count() as u32);
690        Some(FileLineCol {
691            uri: self.uri.clone(),
692            line: line_index.number(),
693            column: column_index.number(),
694        })
695    }
696
697    /// Update the source document after being notified of a change event.
698    ///
699    /// The `version` indicates the new version of the document
700    ///
701    /// NOTE: This is intended to update a [super::SourceManager]'s view of the content of the
702    /// document, _not_ to perform an update against the actual file, wherever it may be.
703    pub fn update(
704        &mut self,
705        text: String,
706        range: Option<Selection>,
707        version: i32,
708    ) -> Result<(), SourceContentUpdateError> {
709        match range {
710            Some(range) => {
711                let start = self
712                    .line_column_to_offset(range.start.line, range.start.character)
713                    .ok_or(SourceContentUpdateError::InvalidSelectionStart(range.start))?
714                    .to_usize();
715                let end = self
716                    .line_column_to_offset(range.end.line, range.end.character)
717                    .ok_or(SourceContentUpdateError::InvalidSelectionEnd(range.end))?
718                    .to_usize();
719                assert!(
720                    start <= end,
721                    "start of range must be less than end, got {start}..{end}",
722                );
723                self.content.replace_range(start..end, &text);
724
725                let added_line_starts = compute_line_starts(&text, Some(start as u32));
726                let num_added = added_line_starts.len();
727                let splice_start = range.start.line.to_usize() + 1;
728                // Determine deletion range in line_starts to respect Selection semantics.
729                // For multi-line edits, remove line starts from (start.line + 1) up to end.line
730                // inclusive, since all intervening newlines are removed by the
731                // replacement, regardless of end.character.
732                enum Deletion {
733                    Empty,
734                    Inclusive(usize), // inclusive end index
735                }
736                let deletion = if range.start.line == range.end.line {
737                    Deletion::Empty
738                } else {
739                    let mut end_line_for_splice = range.end.line.to_usize();
740                    if !self.line_starts.is_empty() {
741                        let max_idx = self.line_starts.len() - 1;
742                        if end_line_for_splice > max_idx {
743                            end_line_for_splice = max_idx;
744                        }
745                    }
746                    if end_line_for_splice >= splice_start {
747                        Deletion::Inclusive(end_line_for_splice)
748                    } else {
749                        Deletion::Empty
750                    }
751                };
752
753                match deletion {
754                    Deletion::Empty => {
755                        self.line_starts
756                            .splice(splice_start..splice_start, added_line_starts);
757                    }
758                    Deletion::Inclusive(end_idx) => {
759                        self.line_starts
760                            .splice(splice_start..=end_idx, added_line_starts);
761                    }
762                }
763
764                let diff =
765                    (text.len() as i32).saturating_sub_unsigned((end as u32) - (start as u32));
766                if diff != 0 {
767                    for i in (splice_start + num_added)..self.line_starts.len() {
768                        self.line_starts[i] =
769                            ByteIndex(self.line_starts[i].to_u32().saturating_add_signed(diff));
770                    }
771                }
772            }
773            None => {
774                self.line_starts = compute_line_starts(&text, None);
775                self.content = text;
776            }
777        }
778
779        self.version = version;
780
781        Ok(())
782    }
783}
784
785fn compute_line_starts(text: &str, text_offset: Option<u32>) -> Vec<ByteIndex> {
786    let bytes = text.as_bytes();
787    let initial_line_offset = match text_offset {
788        Some(_) => None,
789        None => Some(ByteIndex(0)),
790    };
791    let text_offset = text_offset.unwrap_or(0);
792    initial_line_offset
793        .into_iter()
794        .chain(
795            memchr::memchr_iter(b'\n', bytes)
796                .map(|offset| ByteIndex(text_offset + (offset + 1) as u32)),
797        )
798        .collect()
799}
800
801// SOURCE CONTENT INDICES
802// ================================================================================================
803
804/// An index representing the offset in bytes from the start of a source file
805#[derive(
806    Default, Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize,
807)]
808#[serde(transparent)]
809pub struct ByteIndex(pub u32);
810
811impl ByteIndex {
812    /// Create a [ByteIndex] from a raw `u32` index
813    pub const fn new(index: u32) -> Self {
814        Self(index)
815    }
816
817    /// Get the raw index as a usize
818    #[inline(always)]
819    pub const fn to_usize(self) -> usize {
820        self.0 as usize
821    }
822
823    /// Get the raw index as a u32
824    #[inline(always)]
825    pub const fn to_u32(self) -> u32 {
826        self.0
827    }
828}
829
830impl core::ops::Add<ByteOffset> for ByteIndex {
831    type Output = ByteIndex;
832
833    fn add(self, rhs: ByteOffset) -> Self {
834        Self((self.0 as i64 + rhs.0) as u32)
835    }
836}
837
838impl core::ops::Add<u32> for ByteIndex {
839    type Output = ByteIndex;
840
841    fn add(self, rhs: u32) -> Self {
842        Self(self.0 + rhs)
843    }
844}
845
846impl core::ops::AddAssign<ByteOffset> for ByteIndex {
847    fn add_assign(&mut self, rhs: ByteOffset) {
848        *self = *self + rhs;
849    }
850}
851
852impl core::ops::AddAssign<u32> for ByteIndex {
853    fn add_assign(&mut self, rhs: u32) {
854        self.0 += rhs;
855    }
856}
857
858impl core::ops::Sub<ByteOffset> for ByteIndex {
859    type Output = ByteIndex;
860
861    fn sub(self, rhs: ByteOffset) -> Self {
862        Self((self.0 as i64 - rhs.0) as u32)
863    }
864}
865
866impl core::ops::Sub<u32> for ByteIndex {
867    type Output = ByteIndex;
868
869    fn sub(self, rhs: u32) -> Self {
870        Self(self.0 - rhs)
871    }
872}
873
874impl core::ops::SubAssign<ByteOffset> for ByteIndex {
875    fn sub_assign(&mut self, rhs: ByteOffset) {
876        *self = *self - rhs;
877    }
878}
879
880impl core::ops::SubAssign<u32> for ByteIndex {
881    fn sub_assign(&mut self, rhs: u32) {
882        self.0 -= rhs;
883    }
884}
885
886impl From<u32> for ByteIndex {
887    fn from(index: u32) -> Self {
888        Self(index)
889    }
890}
891
892impl From<ByteIndex> for u32 {
893    fn from(index: ByteIndex) -> Self {
894        index.0
895    }
896}
897
898impl fmt::Display for ByteIndex {
899    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
900        fmt::Display::fmt(&self.0, f)
901    }
902}
903
904/// An offset in bytes relative to some [ByteIndex]
905#[derive(Default, Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
906pub struct ByteOffset(i64);
907
908impl ByteOffset {
909    /// Compute the offset in bytes represented by the given `char`
910    pub fn from_char_len(c: char) -> ByteOffset {
911        Self(c.len_utf8() as i64)
912    }
913
914    /// Compute the offset in bytes represented by the given `str`
915    pub fn from_str_len(s: &str) -> ByteOffset {
916        Self(s.len() as i64)
917    }
918}
919
920impl core::ops::Add for ByteOffset {
921    type Output = ByteOffset;
922
923    fn add(self, rhs: Self) -> Self {
924        Self(self.0 + rhs.0)
925    }
926}
927
928impl core::ops::AddAssign for ByteOffset {
929    fn add_assign(&mut self, rhs: Self) {
930        self.0 += rhs.0;
931    }
932}
933
934impl core::ops::Sub for ByteOffset {
935    type Output = ByteOffset;
936
937    fn sub(self, rhs: Self) -> Self {
938        Self(self.0 - rhs.0)
939    }
940}
941
942impl core::ops::SubAssign for ByteOffset {
943    fn sub_assign(&mut self, rhs: Self) {
944        self.0 -= rhs.0;
945    }
946}
947
948impl fmt::Display for ByteOffset {
949    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
950        fmt::Display::fmt(&self.0, f)
951    }
952}
953
954macro_rules! declare_dual_number_and_index_type {
955    ($name:ident, $description:literal) => {
956        paste::paste! {
957            declare_dual_number_and_index_type!([<$name Index>], [<$name Number>], $description);
958        }
959    };
960
961    ($index_name:ident, $number_name:ident, $description:literal) => {
962        #[doc = concat!("A zero-indexed ", $description, " number")]
963        #[derive(
964            Default,
965            Debug,
966            Copy,
967            Clone,
968            PartialEq,
969            Eq,
970            PartialOrd,
971            Ord,
972            Hash,
973            Serialize,
974            Deserialize,
975        )]
976        #[serde(transparent)]
977        pub struct $index_name(pub u32);
978
979        impl $index_name {
980            #[doc = concat!("Convert to a [", stringify!($number_name), "]")]
981            pub const fn number(self) -> $number_name {
982                $number_name(unsafe { NonZeroU32::new_unchecked(self.0 + 1) })
983            }
984
985            /// Get the raw index value as a usize
986            #[inline(always)]
987            pub const fn to_usize(self) -> usize {
988                self.0 as usize
989            }
990
991            /// Get the raw index value as a u32
992            #[inline(always)]
993            pub const fn to_u32(self) -> u32 {
994                self.0
995            }
996
997            /// Add `offset` to this index, returning `None` on overflow
998            pub fn checked_add(self, offset: u32) -> Option<Self> {
999                self.0.checked_add(offset).map(Self)
1000            }
1001
1002            /// Add a signed `offset` to this index, returning `None` on overflow
1003            pub fn checked_add_signed(self, offset: i32) -> Option<Self> {
1004                self.0.checked_add_signed(offset).map(Self)
1005            }
1006
1007            /// Subtract `offset` from this index, returning `None` on underflow
1008            pub fn checked_sub(self, offset: u32) -> Option<Self> {
1009                self.0.checked_sub(offset).map(Self)
1010            }
1011
1012            /// Add `offset` to this index, saturating to `u32::MAX` on overflow
1013            pub const fn saturating_add(self, offset: u32) -> Self {
1014                Self(self.0.saturating_add(offset))
1015            }
1016
1017            /// Add a signed `offset` to this index, saturating to `0` on underflow, and `u32::MAX`
1018            /// on overflow.
1019            pub const fn saturating_add_signed(self, offset: i32) -> Self {
1020                Self(self.0.saturating_add_signed(offset))
1021            }
1022
1023            /// Subtract `offset` from this index, saturating to `0` on overflow
1024            pub const fn saturating_sub(self, offset: u32) -> Self {
1025                Self(self.0.saturating_sub(offset))
1026            }
1027        }
1028
1029        impl From<u32> for $index_name {
1030            #[inline]
1031            fn from(index: u32) -> Self {
1032                Self(index)
1033            }
1034        }
1035
1036        impl From<$number_name> for $index_name {
1037            #[inline]
1038            fn from(index: $number_name) -> Self {
1039                Self(index.to_u32() - 1)
1040            }
1041        }
1042
1043        impl core::ops::Add<u32> for $index_name {
1044            type Output = Self;
1045
1046            #[inline]
1047            fn add(self, rhs: u32) -> Self {
1048                Self(self.0 + rhs)
1049            }
1050        }
1051
1052        impl core::ops::AddAssign<u32> for $index_name {
1053            fn add_assign(&mut self, rhs: u32) {
1054                let result = *self + rhs;
1055                *self = result;
1056            }
1057        }
1058
1059        impl core::ops::Add<i32> for $index_name {
1060            type Output = Self;
1061
1062            fn add(self, rhs: i32) -> Self {
1063                self.checked_add_signed(rhs)
1064                    .expect("invalid offset: overflow occurred")
1065            }
1066        }
1067
1068        impl core::ops::AddAssign<i32> for $index_name {
1069            fn add_assign(&mut self, rhs: i32) {
1070                let result = *self + rhs;
1071                *self = result;
1072            }
1073        }
1074
1075        impl core::ops::Sub<u32> for $index_name {
1076            type Output = Self;
1077
1078            #[inline]
1079            fn sub(self, rhs: u32) -> Self {
1080                Self(self.0 - rhs)
1081            }
1082        }
1083
1084        impl core::ops::SubAssign<u32> for $index_name {
1085            fn sub_assign(&mut self, rhs: u32) {
1086                let result = *self - rhs;
1087                *self = result;
1088            }
1089        }
1090
1091        impl fmt::Display for $index_name {
1092            fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1093                fmt::Display::fmt(&self.0, f)
1094            }
1095        }
1096
1097        #[doc = concat!("A one-indexed ", $description, " number")]
1098        #[derive(
1099            Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize,
1100        )]
1101        #[serde(transparent)]
1102        pub struct $number_name(NonZeroU32);
1103
1104        impl Default for $number_name {
1105            fn default() -> Self {
1106                Self(unsafe { NonZeroU32::new_unchecked(1) })
1107            }
1108        }
1109
1110        impl $number_name {
1111            pub const fn new(number: u32) -> Option<Self> {
1112                match NonZeroU32::new(number) {
1113                    Some(num) => Some(Self(num)),
1114                    None => None,
1115                }
1116            }
1117
1118            #[doc = concat!("Convert to a [", stringify!($index_name), "]")]
1119            pub const fn to_index(self) -> $index_name {
1120                $index_name(self.to_u32().saturating_sub(1))
1121            }
1122
1123            /// Get the raw value as a usize
1124            #[inline(always)]
1125            pub const fn to_usize(self) -> usize {
1126                self.0.get() as usize
1127            }
1128
1129            /// Get the raw value as a u32
1130            #[inline(always)]
1131            pub const fn to_u32(self) -> u32 {
1132                self.0.get()
1133            }
1134
1135            /// Add `offset` to this index, returning `None` on overflow
1136            pub fn checked_add(self, offset: u32) -> Option<Self> {
1137                self.0.checked_add(offset).map(Self)
1138            }
1139
1140            /// Add a signed `offset` to this index, returning `None` on overflow
1141            pub fn checked_add_signed(self, offset: i32) -> Option<Self> {
1142                self.0.get().checked_add_signed(offset).and_then(Self::new)
1143            }
1144
1145            /// Subtract `offset` from this index, returning `None` on underflow
1146            pub fn checked_sub(self, offset: u32) -> Option<Self> {
1147                self.0.get().checked_sub(offset).and_then(Self::new)
1148            }
1149
1150            /// Add `offset` to this index, saturating to `u32::MAX` on overflow
1151            pub const fn saturating_add(self, offset: u32) -> Self {
1152                Self(unsafe { NonZeroU32::new_unchecked(self.0.get().saturating_add(offset)) })
1153            }
1154
1155            /// Add a signed `offset` to this index, saturating to `0` on underflow, and `u32::MAX`
1156            /// on overflow.
1157            pub fn saturating_add_signed(self, offset: i32) -> Self {
1158                Self::new(self.to_u32().saturating_add_signed(offset)).unwrap_or_default()
1159            }
1160
1161            /// Subtract `offset` from this index, saturating to `0` on overflow
1162            pub fn saturating_sub(self, offset: u32) -> Self {
1163                Self::new(self.to_u32().saturating_sub(offset)).unwrap_or_default()
1164            }
1165        }
1166
1167        impl From<NonZeroU32> for $number_name {
1168            #[inline]
1169            fn from(index: NonZeroU32) -> Self {
1170                Self(index)
1171            }
1172        }
1173
1174        impl From<$index_name> for $number_name {
1175            #[inline]
1176            fn from(index: $index_name) -> Self {
1177                Self(unsafe { NonZeroU32::new_unchecked(index.to_u32() + 1) })
1178            }
1179        }
1180
1181        impl core::ops::Add<u32> for $number_name {
1182            type Output = Self;
1183
1184            #[inline]
1185            fn add(self, rhs: u32) -> Self {
1186                Self(unsafe { NonZeroU32::new_unchecked(self.0.get() + rhs) })
1187            }
1188        }
1189
1190        impl core::ops::AddAssign<u32> for $number_name {
1191            fn add_assign(&mut self, rhs: u32) {
1192                let result = *self + rhs;
1193                *self = result;
1194            }
1195        }
1196
1197        impl core::ops::Add<i32> for $number_name {
1198            type Output = Self;
1199
1200            fn add(self, rhs: i32) -> Self {
1201                self.to_u32()
1202                    .checked_add_signed(rhs)
1203                    .and_then(Self::new)
1204                    .expect("invalid offset: overflow occurred")
1205            }
1206        }
1207
1208        impl core::ops::AddAssign<i32> for $number_name {
1209            fn add_assign(&mut self, rhs: i32) {
1210                let result = *self + rhs;
1211                *self = result;
1212            }
1213        }
1214
1215        impl core::ops::Sub<u32> for $number_name {
1216            type Output = Self;
1217
1218            #[inline]
1219            fn sub(self, rhs: u32) -> Self {
1220                self.to_u32()
1221                    .checked_sub(rhs)
1222                    .and_then(Self::new)
1223                    .expect("invalid offset: overflow occurred")
1224            }
1225        }
1226
1227        impl core::ops::SubAssign<u32> for $number_name {
1228            fn sub_assign(&mut self, rhs: u32) {
1229                let result = *self - rhs;
1230                *self = result;
1231            }
1232        }
1233
1234        impl fmt::Display for $number_name {
1235            fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1236                fmt::Display::fmt(&self.0, f)
1237            }
1238        }
1239    };
1240}
1241
1242declare_dual_number_and_index_type!(Line, "line");
1243declare_dual_number_and_index_type!(Column, "column");
1244
1245#[cfg(test)]
1246mod tests {
1247    use super::*;
1248
1249    #[test]
1250    fn source_content_line_starts() {
1251        const CONTENT: &str = "\
1252begin
1253  push.1
1254  push.2
1255  add
1256end
1257";
1258        let content = SourceContent::new("masm", "foo.masm", CONTENT);
1259
1260        assert_eq!(content.line_count(), 6);
1261        assert_eq!(
1262            content
1263                .byte_slice(content.line_range(LineIndex(0)).expect("invalid line"))
1264                .expect("invalid byte range"),
1265            "begin\n".as_bytes()
1266        );
1267        assert_eq!(
1268            content
1269                .byte_slice(content.line_range(LineIndex(1)).expect("invalid line"))
1270                .expect("invalid byte range"),
1271            "  push.1\n".as_bytes()
1272        );
1273        assert_eq!(
1274            content
1275                .byte_slice(
1276                    content
1277                        .line_range(content.last_line_index())
1278                        .expect("invalid line")
1279                )
1280                .expect("invalid byte range"),
1281            "".as_bytes()
1282        );
1283    }
1284
1285    #[test]
1286    fn source_content_line_starts_after_update() {
1287        const CONTENT: &str = "\
1288begin
1289  push.1
1290  push.2
1291  add
1292end
1293";
1294        const FRAGMENT: &str = "  push.2
1295  mul
1296end
1297";
1298        let mut content = SourceContent::new("masm", "foo.masm", CONTENT);
1299        content
1300            .update(
1301                FRAGMENT.to_string(),
1302                Some(Selection::from(LineIndex(4)..LineIndex(5))),
1303                1,
1304            )
1305            .expect("update failed");
1306
1307        assert_eq!(
1308            content.as_str(),
1309            "\
1310begin
1311  push.1
1312  push.2
1313  add
1314  push.2
1315  mul
1316end
1317"
1318        );
1319        assert_eq!(content.line_count(), 8);
1320        assert_eq!(
1321            content
1322                .byte_slice(content.line_range(LineIndex(0)).expect("invalid line"))
1323                .expect("invalid byte range"),
1324            "begin\n".as_bytes()
1325        );
1326        assert_eq!(
1327            content
1328                .byte_slice(content.line_range(LineIndex(3)).expect("invalid line"))
1329                .expect("invalid byte range"),
1330            "  add\n".as_bytes()
1331        );
1332        assert_eq!(
1333            content
1334                .byte_slice(content.line_range(LineIndex(4)).expect("invalid line"))
1335                .expect("invalid byte range"),
1336            "  push.2\n".as_bytes()
1337        );
1338        assert_eq!(
1339            content
1340                .byte_slice(
1341                    content
1342                        .line_range(content.last_line_index())
1343                        .expect("invalid line")
1344                )
1345                .expect("invalid byte range"),
1346            "".as_bytes()
1347        );
1348    }
1349}