use std::{
collections::{HashMap, HashSet},
rc::Rc,
};
use tree_sitter::Node;
use super::language_rules::{Classified, MetadataBinding, USE_POISON_KEY, rules_for};
pub(super) use super::language_rules::{ItemKind, UseIdentity};
use crate::parser::{Language, ParsedFile};
#[derive(Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
pub(crate) struct ItemKey {
pub kind: ItemKind,
pub name: String,
pub scope: Vec<String>,
pub signature_hash: u64,
}
#[derive(Clone, Debug)]
pub(crate) struct Item {
pub key: ItemKey,
pub start_byte: usize,
pub end_byte: usize,
pub use_identity: Option<UseIdentity>,
pub body: Option<ContainerBody>,
}
#[derive(Clone, Debug)]
pub(crate) struct ContainerBody {
pub inner_start: usize,
pub inner_end: usize,
pub content_start: usize,
pub content_end: usize,
pub items: Vec<Item>,
}
#[derive(Clone, Debug)]
pub(crate) struct FileSegments {
pub items: Vec<Item>,
pub source_len: usize,
}
pub(crate) fn inter_ranges(
items: &[Item],
region_start: usize,
region_end: usize,
) -> Vec<(usize, usize)> {
let mut out = Vec::with_capacity(items.len() + 1);
let mut cursor = region_start;
for item in items {
out.push((cursor, item.start_byte));
cursor = item.end_byte;
}
out.push((cursor, region_end));
out
}
fn body_content_bounds(body: Node<'_>) -> (usize, usize) {
let inner_start = body.start_byte();
let inner_end = body.end_byte();
let mut content_start = inner_start;
let mut content_end = inner_end;
let count = body.child_count();
if count > 0 {
let first = body.child(0).unwrap();
if !first.is_named() {
content_start = first.end_byte();
}
let last = body.child(count as u32 - 1).unwrap();
if !last.is_named() {
content_end = last.start_byte();
}
}
content_start = content_start.clamp(inner_start, inner_end);
content_end = content_end.clamp(inner_start, inner_end);
if content_start > content_end {
content_end = content_start;
}
(content_start, content_end)
}
pub(crate) fn extract_items(parsed: &ParsedFile) -> Vec<Item> {
let raws = collect_raw_items(parsed.language, &parsed.source, parsed.root_node());
assemble_tree(raws)
}
pub(crate) fn segment_file(parsed: &ParsedFile) -> FileSegments {
FileSegments {
items: extract_items(parsed),
source_len: parsed.source.len(),
}
}
const MAX_TRAVERSAL_DEPTH: usize = 256;
const CONTAINER_DEPTH_LIMIT: usize = 8;
struct RawItem {
key: ItemKey,
start_byte: usize,
end_byte: usize,
use_identity: Option<UseIdentity>,
container: Option<(usize, usize, usize, usize)>,
}
fn collect_raw_items(language: Language, source: &str, root: Node<'_>) -> Vec<RawItem> {
let mut out: Vec<RawItem> = Vec::new();
let empty: Rc<Vec<String>> = Rc::new(Vec::new());
let mut stack: Vec<(Node<'_>, Rc<Vec<String>>, usize, usize)> =
vec![(root, Rc::clone(&empty), 0, 0)];
while let Some((node, scope, depth, cdepth)) = stack.pop() {
if depth > MAX_TRAVERSAL_DEPTH {
continue;
}
let mut cursor = node.walk();
for child in node.children(&mut cursor) {
if let Some(classified) = classify_node(language, source, child) {
let Classified {
kind,
name,
container_body,
signature_hash,
extra_scope,
} = classified;
let start_byte = leading_metadata_start(language, source, child);
if let Some(body) = container_body {
let recurse = cdepth < CONTAINER_DEPTH_LIMIT;
let item_key = ItemKey {
kind,
name: name.clone(),
scope: (*scope).clone(),
signature_hash,
};
out.push(RawItem {
key: item_key,
start_byte,
end_byte: child.end_byte(),
use_identity: None,
container: recurse.then(|| {
let (content_start, content_end) = body_content_bounds(body);
(
body.start_byte(),
body.end_byte(),
content_start,
content_end,
)
}),
});
if recurse {
let mut next_scope = (*scope).clone();
next_scope.push(name);
stack.push((body, Rc::new(next_scope), depth + 1, cdepth + 1));
}
} else {
let mut item_scope = (*scope).clone();
item_scope.extend(extra_scope);
let use_identity = if matches!(kind, ItemKind::Use) {
super::language_rules::use_identity(language, source, child)
} else {
None
};
let item_key = ItemKey {
kind,
name,
scope: item_scope,
signature_hash,
};
out.push(RawItem {
key: item_key,
start_byte,
end_byte: child.end_byte(),
use_identity,
container: None,
});
}
} else {
stack.push((child, Rc::clone(&scope), depth + 1, cdepth));
}
}
}
out
}
fn assemble_tree(mut raws: Vec<RawItem>) -> Vec<Item> {
raws.sort_by_key(|r| r.start_byte);
let mut top: Vec<Item> = Vec::new();
let mut open: Vec<(Item, Vec<Item>, usize)> = Vec::new();
fn attach(item: Item, open: &mut [(Item, Vec<Item>, usize)], top: &mut Vec<Item>) {
match open.last_mut() {
Some((_, children, _)) => children.push(item),
None => top.push(item),
}
}
fn close_one(open: &mut Vec<(Item, Vec<Item>, usize)>, top: &mut Vec<Item>) {
let (mut container, children, inner_end) = open.pop().unwrap();
if let Some(body) = container.body.as_mut() {
debug_assert_eq!(body.inner_end, inner_end);
body.items = children;
}
attach(container, open, top);
}
for raw in raws {
while open
.last()
.is_some_and(|(_, _, end)| raw.start_byte >= *end)
{
close_one(&mut open, &mut top);
}
match raw.container {
Some((inner_start, inner_end, content_start, content_end)) => {
let item = Item {
key: raw.key,
start_byte: raw.start_byte,
end_byte: raw.end_byte,
use_identity: raw.use_identity,
body: Some(ContainerBody {
inner_start,
inner_end,
content_start,
content_end,
items: Vec::new(),
}),
};
open.push((item, Vec::new(), inner_end));
}
None => {
let item = Item {
key: raw.key,
start_byte: raw.start_byte,
end_byte: raw.end_byte,
use_identity: raw.use_identity,
body: None,
};
attach(item, &mut open, &mut top);
}
}
}
while !open.is_empty() {
close_one(&mut open, &mut top);
}
top
}
pub(crate) fn canonicalize_use_keys(
base: &mut FileSegments,
ours: &mut FileSegments,
theirs: &mut FileSegments,
) {
let mut poisoned_scopes: HashSet<Vec<String>> = HashSet::new();
for seg in [&*base, &*ours, &*theirs] {
visit_items(&seg.items, &mut |item| {
if matches!(item.use_identity, Some(UseIdentity::Unanalyzable)) {
poisoned_scopes.insert(item.key.scope.clone());
}
});
}
if !poisoned_scopes.is_empty() {
for seg in [&mut *base, &mut *ours, &mut *theirs] {
visit_items_mut(&mut seg.items, &mut |item| {
if item.use_identity.is_some() && poisoned_scopes.contains(&item.key.scope) {
item.key.name = USE_POISON_KEY.to_string();
}
});
}
}
let mut uf = LeafUnionFind::default();
for seg in [&*base, &*ours, &*theirs] {
visit_items(&seg.items, &mut |item| {
if poisoned_scopes.contains(&item.key.scope) {
return;
}
let Some(UseIdentity::Plain(leaves)) = &item.use_identity else {
return;
};
let mut leaf_iter = leaves.iter();
let Some(first) = leaf_iter.next() else {
return;
};
let anchor = uf.intern(first);
for leaf in leaf_iter {
let node = uf.intern(leaf);
uf.union(anchor, node);
}
});
}
let canonical = uf.component_min_label();
for seg in [base, ours, theirs] {
visit_items_mut(&mut seg.items, &mut |item| {
if poisoned_scopes.contains(&item.key.scope) {
return;
}
let Some(UseIdentity::Plain(leaves)) = &item.use_identity else {
return;
};
if let Some(first) = leaves.first()
&& let Some(name) = canonical.get(first)
{
item.key.name = name.clone();
}
});
}
}
pub(crate) fn visit_items<'a>(items: &'a [Item], f: &mut impl FnMut(&'a Item)) {
for item in items {
f(item);
if let Some(body) = &item.body {
visit_items(&body.items, f);
}
}
}
fn visit_items_mut(items: &mut [Item], f: &mut impl FnMut(&mut Item)) {
for item in items {
f(item);
if let Some(body) = &mut item.body {
visit_items_mut(&mut body.items, f);
}
}
}
#[derive(Default)]
struct LeafUnionFind {
index: HashMap<String, usize>,
labels: Vec<String>,
parent: Vec<usize>,
}
impl LeafUnionFind {
fn intern(&mut self, leaf: &str) -> usize {
if let Some(&i) = self.index.get(leaf) {
return i;
}
let i = self.labels.len();
self.index.insert(leaf.to_string(), i);
self.labels.push(leaf.to_string());
self.parent.push(i);
i
}
fn find(&mut self, mut x: usize) -> usize {
while self.parent[x] != x {
self.parent[x] = self.parent[self.parent[x]];
x = self.parent[x];
}
x
}
fn union(&mut self, a: usize, b: usize) {
let ra = self.find(a);
let rb = self.find(b);
if ra != rb {
self.parent[ra] = rb;
}
}
fn component_min_label(&mut self) -> HashMap<String, String> {
let mut root_min: HashMap<usize, String> = HashMap::new();
for i in 0..self.labels.len() {
let root = self.find(i);
let label = self.labels[i].clone();
root_min
.entry(root)
.and_modify(|m| {
if label < *m {
*m = label.clone();
}
})
.or_insert(label);
}
let mut out = HashMap::with_capacity(self.labels.len());
for i in 0..self.labels.len() {
let root = self.find(i);
out.insert(self.labels[i].clone(), root_min[&root].clone());
}
out
}
}
fn classify_node<'a>(
language: Language,
source: &'a str,
node: Node<'a>,
) -> Option<Classified<'a>> {
rules_for(language)?.classify_node(language, source, node)
}
fn leading_metadata_start(language: Language, source: &str, node: Node<'_>) -> usize {
let mut earliest = node.start_byte();
let mut current = node;
while let Some(prev) = current.prev_sibling() {
if !is_leading_metadata_for(language, prev, source, current.start_byte()) {
break;
}
earliest = prev.start_byte();
current = prev;
}
earliest
}
fn is_leading_metadata_for(
language: Language,
prev: Node<'_>,
source: &str,
next_start: usize,
) -> bool {
let Some(rules) = rules_for(language) else {
return false;
};
let kind = prev.kind();
rules.leading_metadata_kinds().iter().any(|rule| {
rule.kind == kind
&& match rule.binding {
MetadataBinding::Always => true,
MetadataBinding::NoBlankLine => {
!has_blank_line_between(source, prev.end_byte(), next_start)
}
MetadataBinding::RustOuterComment => {
!is_rust_inner_doc_comment(source, prev)
&& !has_blank_line_between(source, prev.end_byte(), next_start)
}
}
})
}
fn is_rust_inner_doc_comment(source: &str, node: Node<'_>) -> bool {
let bytes = source.as_bytes();
let start = node.start_byte();
if start + 3 > source.len() {
return false;
}
let head = &bytes[start..start + 3];
head == b"//!" || head == b"/*!"
}
fn has_blank_line_between(source: &str, start: usize, end: usize) -> bool {
if start >= end {
return false;
}
source.as_bytes()[start..end]
.iter()
.filter(|&&b| b == b'\n')
.count()
>= 2
}