use std::sync::Arc;
use shuck_ast::{Comment, Position, Span};
#[derive(Debug, Clone)]
pub struct SourceMap<'a> {
source: &'a str,
data: Arc<SourceMapData>,
}
#[derive(Debug)]
struct SourceMapData {
line_starts: Vec<usize>,
first_non_whitespace: Vec<Option<usize>>,
hash_offsets: Vec<usize>,
tab_offsets: Vec<usize>,
double_space_offsets: Vec<usize>,
}
impl<'a> SourceMap<'a> {
#[must_use]
pub fn new(source: &'a str) -> Self {
let line_starts = line_starts(source);
let first_non_whitespace = line_starts
.iter()
.enumerate()
.map(|(index, start)| {
let end = line_starts.get(index + 1).copied().unwrap_or(source.len());
source[*start..end]
.char_indices()
.find(|(_, ch)| *ch != '\n' && !ch.is_whitespace())
.map(|(offset, _)| start + offset)
})
.collect();
let mut hash_offsets = Vec::new();
let mut tab_offsets = Vec::new();
let mut double_space_offsets = Vec::new();
let bytes = source.as_bytes();
for offset in 0..bytes.len() {
match bytes[offset] {
b'#' => hash_offsets.push(offset),
b'\t' => tab_offsets.push(offset),
b' ' if offset + 1 < bytes.len() && bytes[offset + 1] == b' ' => {
double_space_offsets.push(offset);
}
_ => {}
}
}
Self {
source,
data: Arc::new(SourceMapData {
line_starts,
first_non_whitespace,
hash_offsets,
tab_offsets,
double_space_offsets,
}),
}
}
#[must_use]
pub fn source(&self) -> &'a str {
self.source
}
#[must_use]
pub fn line_number_for_offset(&self, offset: usize) -> usize {
self.line_index_for_offset(offset) + 1
}
#[must_use]
pub fn span_for_offsets(&self, start: usize, end: usize) -> Span {
let line_index = self.line_index_for_offset(start);
let line_start = self.data.line_starts[line_index];
let text = self.source.get(start..end).unwrap_or("");
let start_position = Position {
line: line_index + 1,
column: self.source[line_start..start].chars().count() + 1,
offset: start,
};
let end_position = start_position.advanced_by(text);
Span::from_positions(start_position, end_position)
}
#[must_use]
pub fn is_inline_comment(&self, offset: usize) -> bool {
self.data.first_non_whitespace[self.line_index_for_offset(offset)]
.is_some_and(|first| first < offset)
}
#[must_use]
pub(crate) fn source_comment(&self, comment: Comment) -> Option<SourceComment<'a>> {
let start = usize::from(comment.range.start());
let end = usize::from(comment.range.end());
(start < end && end <= self.source.len()).then(|| SourceComment {
text: &self.source[start..end],
span: self.span_for_offsets(start, end),
line: self.line_number_for_offset(start),
inline: self.is_inline_comment(start),
})
}
#[must_use]
pub fn contains_comment_between(&self, start: usize, end: usize) -> bool {
contains_offset_in_range(&self.data.hash_offsets, start, end)
}
#[must_use]
pub fn contains_newline_between(&self, start: usize, end: usize) -> bool {
if start >= end {
return false;
}
let index = self
.data
.line_starts
.partition_point(|offset| *offset <= start);
self.data
.line_starts
.get(index)
.is_some_and(|offset| *offset < end)
}
#[must_use]
pub fn has_alignment_padding_between(&self, start: usize, end: usize) -> bool {
if start >= end || self.contains_newline_between(start, end) {
return false;
}
contains_offset_in_range(&self.data.tab_offsets, start, end)
|| end.saturating_sub(start) >= 2
&& contains_offset_in_range(
&self.data.double_space_offsets,
start,
end.saturating_sub(1),
)
}
fn line_index_for_offset(&self, offset: usize) -> usize {
let offset = offset.min(self.source.len().saturating_sub(1));
match self.data.line_starts.binary_search(&offset) {
Ok(index) => index,
Err(index) => index.saturating_sub(1),
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct SourceComment<'a> {
text: &'a str,
span: Span,
line: usize,
inline: bool,
}
impl<'a> SourceComment<'a> {
#[must_use]
pub fn text(&self) -> &'a str {
self.text
}
#[must_use]
pub fn span(&self) -> Span {
self.span
}
#[must_use]
pub fn line(&self) -> usize {
self.line
}
#[must_use]
pub fn inline(&self) -> bool {
self.inline
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct SequenceCommentAttachment<'a> {
leading: Vec<Vec<SourceComment<'a>>>,
trailing: Vec<Vec<SourceComment<'a>>>,
dangling: Vec<SourceComment<'a>>,
ambiguous: bool,
}
impl<'a> SequenceCommentAttachment<'a> {
fn new(child_count: usize) -> Self {
Self {
leading: vec![Vec::new(); child_count],
trailing: vec![Vec::new(); child_count],
dangling: Vec::new(),
ambiguous: false,
}
}
#[must_use]
pub fn leading_for(&self, index: usize) -> &[SourceComment<'a>] {
self.leading.get(index).map_or(&[], Vec::as_slice)
}
#[must_use]
pub fn trailing_for(&self, index: usize) -> &[SourceComment<'a>] {
self.trailing.get(index).map_or(&[], Vec::as_slice)
}
#[must_use]
pub fn dangling(&self) -> &[SourceComment<'a>] {
&self.dangling
}
#[must_use]
pub fn is_ambiguous(&self) -> bool {
self.ambiguous
}
#[must_use]
pub fn has_comments(&self) -> bool {
self.ambiguous
|| !self.dangling.is_empty()
|| self.leading.iter().any(|comments| !comments.is_empty())
|| self.trailing.iter().any(|comments| !comments.is_empty())
}
#[allow(clippy::type_complexity)]
pub(crate) fn into_parts(
self,
) -> (
Vec<Vec<SourceComment<'a>>>,
Vec<Vec<SourceComment<'a>>>,
Vec<SourceComment<'a>>,
bool,
) {
(self.leading, self.trailing, self.dangling, self.ambiguous)
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub(crate) struct SequenceCommentAnalysis<'a> {
pub(crate) attachment: SequenceCommentAttachment<'a>,
pub(crate) claimed_indices: Vec<usize>,
}
#[derive(Debug, Clone)]
pub struct CommentAttachmentIndex<'a> {
source_map: SourceMap<'a>,
items: Arc<[SourceComment<'a>]>,
claimed: Vec<bool>,
next_unclaimed: usize,
}
pub type Comments<'a> = CommentAttachmentIndex<'a>;
impl<'a> CommentAttachmentIndex<'a> {
#[must_use]
pub fn from_ast(source: &'a str, comments: &[Comment]) -> Self {
let source_map = SourceMap::new(source);
let mut items = comments
.iter()
.filter_map(|comment| source_map.source_comment(*comment))
.collect::<Vec<_>>();
items.sort_by_key(|comment| comment.span.start.offset);
let claimed = vec![false; items.len()];
Self {
source_map,
items: Arc::from(items.into_boxed_slice()),
claimed,
next_unclaimed: 0,
}
}
#[must_use]
pub fn source_map(&self) -> &SourceMap<'a> {
&self.source_map
}
#[must_use]
pub fn len(&self) -> usize {
self.items.len()
}
#[must_use]
pub(crate) fn inspect_sequence(
&self,
child_spans: &[Span],
upper_bound: Option<usize>,
) -> SequenceCommentAnalysis<'a> {
compute_sequence_attachment(
&self.items,
Some(&self.claimed),
0,
self.next_unclaimed,
child_spans,
upper_bound,
None,
true,
)
}
pub(crate) fn claim_sequence(&mut self, analysis: &SequenceCommentAnalysis<'a>) {
for index in &analysis.claimed_indices {
self.claimed[*index] = true;
}
self.advance_next_unclaimed();
}
pub fn attach_sequence(
&mut self,
child_spans: &[Span],
upper_bound: Option<usize>,
) -> SequenceCommentAttachment<'a> {
let analysis = self.inspect_sequence(child_spans, upper_bound);
self.claim_sequence(&analysis);
analysis.attachment
}
pub fn take_remaining(&mut self) -> Vec<SourceComment<'a>> {
let mut remaining = Vec::new();
for index in self.next_unclaimed..self.items.len() {
if self.claimed[index] {
continue;
}
self.claimed[index] = true;
remaining.push(self.items[index]);
}
self.advance_next_unclaimed();
remaining
}
pub fn claim_in_span(&mut self, span: Span) {
for index in self.next_unclaimed..self.items.len() {
let comment = self.items[index];
if comment.span.start.offset > span.end.offset {
break;
}
if self.claimed[index] {
continue;
}
if span.start.offset <= comment.span.start.offset
&& comment.span.end.offset <= span.end.offset
{
self.claimed[index] = true;
}
}
self.advance_next_unclaimed();
}
pub fn claim_lines(&mut self, start_line: usize, end_line: usize) {
for index in self.next_unclaimed..self.items.len() {
let comment = self.items[index];
if comment.line > end_line {
break;
}
if self.claimed[index] {
continue;
}
if (start_line..=end_line).contains(&comment.line) {
self.claimed[index] = true;
}
}
self.advance_next_unclaimed();
}
fn advance_next_unclaimed(&mut self) {
while self.next_unclaimed < self.claimed.len() && self.claimed[self.next_unclaimed] {
self.next_unclaimed += 1;
}
}
}
#[allow(clippy::too_many_arguments)]
fn compute_sequence_attachment<'a>(
items: &[SourceComment<'a>],
claimed: Option<&[bool]>,
base_index: usize,
start_index: usize,
child_spans: &[Span],
upper_bound: Option<usize>,
skip_span: Option<Span>,
track_claimed_indices: bool,
) -> SequenceCommentAnalysis<'a> {
let mut attachment = SequenceCommentAttachment::new(child_spans.len());
if child_spans.is_empty() || start_index >= items.len() {
return SequenceCommentAnalysis {
attachment,
claimed_indices: Vec::new(),
};
}
let mut claimed_indices = Vec::new();
let first_child_start = child_spans[0].start.offset;
let last_child_end = child_spans
.last()
.map(|span| span.end.offset)
.unwrap_or(first_child_start);
let limit_end = upper_bound.unwrap_or(usize::MAX);
let mut child_cursor = 0;
let mut index = start_index;
while index < items.len() {
if comment_is_claimed(claimed, base_index, index) {
index += 1;
continue;
}
let comment = items[index];
let start = comment.span.start.offset;
let end = comment.span.end.offset;
if start >= limit_end {
break;
}
if end > limit_end {
index += 1;
continue;
}
if skip_span.is_some_and(|span| span_contains_comment(span, comment)) {
index += 1;
continue;
}
while child_cursor < child_spans.len() && child_spans[child_cursor].end.offset <= start {
child_cursor += 1;
}
let prev = child_cursor.checked_sub(1);
let next = child_spans
.get(child_cursor)
.and_then(|span| (span.start.offset >= end).then_some(child_cursor));
let current = child_spans.get(child_cursor);
let inside_current =
current.is_some_and(|span| span.start.offset <= start && end <= span.end.offset);
if inside_current {
index += 1;
continue;
}
if comment.inline {
if let Some(prev_idx) = prev
&& child_spans[prev_idx].end.line == comment.line
&& child_spans[prev_idx].start.offset <= start
{
attachment.trailing[prev_idx].push(comment);
record_claimed_index(
&mut claimed_indices,
track_claimed_indices,
base_index + index,
);
index += 1;
continue;
}
match (prev, next) {
(Some(prev_idx), Some(next_idx))
if prev_idx + 1 == next_idx
&& child_spans[prev_idx].end.line == comment.line =>
{
attachment.trailing[prev_idx].push(comment);
record_claimed_index(
&mut claimed_indices,
track_claimed_indices,
base_index + index,
);
}
(Some(prev_idx), None) if child_spans[prev_idx].end.line == comment.line => {
attachment.trailing[prev_idx].push(comment);
record_claimed_index(
&mut claimed_indices,
track_claimed_indices,
base_index + index,
);
}
_ => attachment.ambiguous = true,
}
index += 1;
continue;
}
if end <= first_child_start {
let run_end = advance_comment_run(
items,
claimed,
base_index,
index,
limit_end,
skip_span,
|candidate| candidate.span.end.offset <= first_child_start,
);
for (i, item) in items[index..run_end].iter().enumerate() {
attachment.leading[0].push(*item);
record_claimed_index(
&mut claimed_indices,
track_claimed_indices,
base_index + index + i,
);
}
index = run_end;
} else if let Some(next_idx) = next {
let gap_start = prev
.map(|prev_idx| child_spans[prev_idx].end.offset)
.unwrap_or(0);
let gap_end = child_spans[next_idx].start.offset;
let run_end = advance_comment_run(
items,
claimed,
base_index,
index,
limit_end,
skip_span,
|candidate| {
candidate.span.start.offset >= gap_start && candidate.span.end.offset <= gap_end
},
);
for (i, item) in items[index..run_end].iter().enumerate() {
attachment.leading[next_idx].push(*item);
record_claimed_index(
&mut claimed_indices,
track_claimed_indices,
base_index + index + i,
);
}
index = run_end;
} else if start >= last_child_end {
let run_end = advance_comment_run(
items,
claimed,
base_index,
index,
limit_end,
skip_span,
|candidate| candidate.span.start.offset >= last_child_end,
);
for (i, item) in items[index..run_end].iter().enumerate() {
attachment.dangling.push(*item);
record_claimed_index(
&mut claimed_indices,
track_claimed_indices,
base_index + index + i,
);
}
index = run_end;
} else {
index += 1;
}
}
SequenceCommentAnalysis {
attachment,
claimed_indices,
}
}
pub(crate) fn inspect_sequence_comments_in_window<'a>(
items: &[SourceComment<'a>],
child_spans: &[Span],
upper_bound: Option<usize>,
skip_span: Option<Span>,
) -> SequenceCommentAttachment<'a> {
compute_sequence_attachment(
items,
None,
0,
0,
child_spans,
upper_bound,
skip_span,
false,
)
.attachment
}
fn advance_comment_run<'a>(
items: &[SourceComment<'a>],
claimed: Option<&[bool]>,
base_index: usize,
start_index: usize,
limit_end: usize,
skip_span: Option<Span>,
belongs: impl Fn(SourceComment<'a>) -> bool,
) -> usize {
let mut index = start_index;
while index < items.len() {
if comment_is_claimed(claimed, base_index, index) {
break;
}
let comment = items[index];
if comment.span.start.offset >= limit_end
|| comment.inline
|| comment.span.end.offset > limit_end
|| skip_span.is_some_and(|span| span_contains_comment(span, comment))
|| !belongs(comment)
{
break;
}
index += 1;
}
index
}
fn record_claimed_index(target: &mut Vec<usize>, track: bool, index: usize) {
if track {
target.push(index);
}
}
fn comment_is_claimed(claimed: Option<&[bool]>, base_index: usize, index: usize) -> bool {
claimed
.and_then(|flags| flags.get(base_index + index))
.copied()
.unwrap_or(false)
}
fn span_contains_comment(span: Span, comment: SourceComment<'_>) -> bool {
span.start.offset <= comment.span.start.offset && comment.span.end.offset <= span.end.offset
}
fn line_starts(source: &str) -> Vec<usize> {
let mut starts = vec![0];
for (offset, byte) in source.bytes().enumerate() {
if byte == b'\n' && offset + 1 < source.len() {
starts.push(offset + 1);
}
}
starts
}
fn contains_offset_in_range(offsets: &[usize], start: usize, end: usize) -> bool {
if start >= end {
return false;
}
let index = offsets.partition_point(|offset| *offset < start);
offsets.get(index).is_some_and(|offset| *offset < end)
}