1#![allow(clippy::mutable_key_type)] use std::{cell::Cell, fmt, hash::Hash, num::NonZeroU32};
6
7use line_numbers::SingleLineSpan;
8use typed_arena::Arena;
9
10use self::Syntax::*;
11use super::{changes::ChangeKind, changes::ChangeKind::*, hash::DftHashMap};
12
13fn split_on_newlines(s: &str) -> impl Iterator<Item = &str> {
15 s.split('\n').map(|l| l.strip_suffix('\r').unwrap_or(l))
16}
17
18impl fmt::Debug for ChangeKind<'_> {
22 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
23 let desc = match self {
24 Unchanged(node) => format!("Unchanged(ID: {})", node.id()),
25 ReplacedComment(lhs_node, rhs_node) | ReplacedString(lhs_node, rhs_node) => {
26 let change_kind = if let ReplacedComment(_, _) = self {
27 "ReplacedComment"
28 } else {
29 "ReplacedString"
30 };
31
32 format!(
33 "{}(lhs ID: {}, rhs ID: {})",
34 change_kind,
35 lhs_node.id(),
36 rhs_node.id()
37 )
38 }
39 Novel => "Novel".to_owned(),
40 };
41 f.write_str(&desc)
42 }
43}
44
45pub type SyntaxId = NonZeroU32;
46
47pub type ContentId = u32;
48
49pub struct SyntaxInfo<'a> {
51 previous_sibling: Cell<Option<&'a Syntax<'a>>>,
53 next_sibling: Cell<Option<&'a Syntax<'a>>>,
55 prev: Cell<Option<&'a Syntax<'a>>>,
58 parent: Cell<Option<&'a Syntax<'a>>>,
60 num_ancestors: Cell<u32>,
62 pub num_after: Cell<usize>,
63 unique_id: Cell<SyntaxId>,
65 content_id: Cell<ContentId>,
71 content_is_unique: Cell<bool>,
74}
75
76impl<'a> SyntaxInfo<'a> {
77 pub fn new() -> Self {
78 Self {
79 previous_sibling: Cell::new(None),
80 next_sibling: Cell::new(None),
81 prev: Cell::new(None),
82 parent: Cell::new(None),
83 num_ancestors: Cell::new(0),
84 num_after: Cell::new(0),
85 unique_id: Cell::new(NonZeroU32::new(u32::MAX).unwrap()),
86 content_id: Cell::new(0),
87 content_is_unique: Cell::new(false),
88 }
89 }
90}
91
92impl Default for SyntaxInfo<'_> {
93 fn default() -> Self {
94 Self::new()
95 }
96}
97
98pub enum Syntax<'a> {
99 List {
100 info: SyntaxInfo<'a>,
101 open_position: Vec<SingleLineSpan>,
102 open_content: String,
103 children: Vec<&'a Syntax<'a>>,
104 close_position: Vec<SingleLineSpan>,
105 close_content: String,
106 num_descendants: u32,
107 },
108 Atom {
109 info: SyntaxInfo<'a>,
110 position: Vec<SingleLineSpan>,
111 content: String,
112 kind: AtomKind,
113 },
114}
115
116fn dbg_pos(pos: &[SingleLineSpan]) -> String {
117 match pos {
118 [] => "-".into(),
119 [pos] => format!("{}:{}-{}", pos.line.0, pos.start_col, pos.end_col),
120 [start, .., end] => format!(
121 "{}:{}-{}:{}",
122 start.line.0, start.start_col, end.line.0, end.end_col
123 ),
124 }
125}
126
127impl<'a> fmt::Debug for Syntax<'a> {
128 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
129 match self {
130 List {
131 open_content,
132 open_position,
133 children,
134 close_content,
135 close_position,
136 ..
137 } => {
138 let ds = f.debug_struct(&format!(
139 "List id:{} content_id:{}",
140 self.id(),
141 self.content_id()
142 ));
143 let mut ds = ds;
144
145 ds.field("open_content", &open_content)
146 .field("open_position", &dbg_pos(open_position))
147 .field("children", &children)
148 .field("close_content", &close_content)
149 .field("close_position", &dbg_pos(close_position));
150
151 ds.finish()
152 }
153 Atom {
154 content, position, ..
155 } => {
156 let ds = f.debug_struct(&format!(
157 "Atom id:{} content_id:{}",
158 self.id(),
159 self.content_id()
160 ));
161 let mut ds = ds;
162 ds.field("content", &content);
163 ds.field("position", &dbg_pos(position));
164
165 ds.finish()
166 }
167 }
168 }
169}
170
171impl<'a> Syntax<'a> {
172 pub fn new_list(
173 arena: &'a Arena<Self>,
174 open_content: &str,
175 open_position: Vec<SingleLineSpan>,
176 children: Vec<&'a Self>,
177 close_content: &str,
178 close_position: Vec<SingleLineSpan>,
179 ) -> &'a Self {
180 let children = children
184 .into_iter()
185 .filter(|n| match n {
186 List { .. } => true,
187 Atom { content, .. } => !content.is_empty(),
188 })
189 .collect::<Vec<_>>();
190
191 if children.len() == 1 && open_content.is_empty() && close_content.is_empty() {
201 return children[0];
202 }
203
204 let mut num_descendants = 0;
205 for child in &children {
206 num_descendants += match child {
207 List {
208 num_descendants, ..
209 } => *num_descendants + 1,
210 Atom { .. } => 1,
211 };
212 }
213
214 arena.alloc(List {
215 info: SyntaxInfo::default(),
216 open_position,
217 open_content: open_content.into(),
218 close_content: close_content.into(),
219 close_position,
220 children,
221 num_descendants,
222 })
223 }
224
225 pub fn new_atom(
226 arena: &'a Arena<Self>,
227 mut position: Vec<SingleLineSpan>,
228 mut content: String,
229 kind: AtomKind,
230 ) -> &'a Self {
231 if content.ends_with('\r') {
234 content.pop();
235 }
236
237 if content.ends_with('\n') {
242 position.pop();
243 content.pop();
244 }
245
246 arena.alloc(Atom {
247 info: SyntaxInfo::default(),
248 position,
249 content,
250 kind,
251 })
252 }
253
254 pub fn info(&self) -> &SyntaxInfo<'a> {
255 match self {
256 List { info, .. } | Atom { info, .. } => info,
257 }
258 }
259
260 pub fn parent(&self) -> Option<&'a Self> {
261 self.info().parent.get()
262 }
263
264 pub fn next_sibling(&self) -> Option<&'a Self> {
265 self.info().next_sibling.get()
266 }
267
268 pub fn id(&self) -> SyntaxId {
271 self.info().unique_id.get()
272 }
273
274 pub fn content_id(&self) -> ContentId {
278 self.info().content_id.get()
279 }
280
281 pub fn content_is_unique(&self) -> bool {
282 self.info().content_is_unique.get()
283 }
284
285 pub fn num_ancestors(&self) -> u32 {
286 self.info().num_ancestors.get()
287 }
288
289 pub fn dbg_content(&self) -> String {
290 match self {
291 List {
292 open_content,
293 open_position,
294 close_content,
295 ..
296 } => {
297 let line = open_position
298 .first()
299 .map(|p| p.line.display())
300 .unwrap_or_else(|| "?".to_owned());
301
302 format!("line:{} {} ... {}", line, open_content, close_content)
303 }
304 Atom {
305 content, position, ..
306 } => {
307 let line = position
308 .first()
309 .map_or_else(|| "?".to_owned(), |p| p.line.display());
310
311 format!("line:{} {}", line, content)
312 }
313 }
314 }
315}
316
317pub fn init_all_info<'a>(lhs_roots: &[&'a Syntax<'a>], rhs_roots: &[&'a Syntax<'a>]) {
319 init_info(lhs_roots, rhs_roots);
320 init_next_prev(lhs_roots);
321 init_next_prev(rhs_roots);
322}
323
324fn init_info<'a>(lhs_roots: &[&'a Syntax<'a>], rhs_roots: &[&'a Syntax<'a>]) {
325 let mut id = NonZeroU32::new(1).unwrap();
326 init_info_on_side(lhs_roots, &mut id);
327 init_info_on_side(rhs_roots, &mut id);
328
329 let mut existing = DftHashMap::default();
330 set_content_id(lhs_roots, &mut existing);
331 set_content_id(rhs_roots, &mut existing);
332
333 set_content_is_unique(lhs_roots);
334 set_content_is_unique(rhs_roots);
335}
336
337type ContentKey = (Option<String>, Option<String>, Vec<u32>, bool, bool);
338
339fn set_content_id(nodes: &[&Syntax], existing: &mut DftHashMap<ContentKey, u32>) {
340 for node in nodes {
341 let key: ContentKey = match node {
342 List {
343 open_content,
344 close_content,
345 children,
346 ..
347 } => {
348 set_content_id(children, existing);
350
351 let children_content_ids: Vec<_> =
352 children.iter().map(|c| c.info().content_id.get()).collect();
353
354 (
355 Some(open_content.clone()),
356 Some(close_content.clone()),
357 children_content_ids,
358 true,
359 true,
360 )
361 }
362 Atom {
363 content,
364 kind: highlight,
365 ..
366 } => {
367 let is_comment = *highlight == AtomKind::Comment;
368 let clean_content = if is_comment && split_on_newlines(content).count() > 1 {
369 split_on_newlines(content)
370 .map(|l| l.trim_start())
371 .collect::<Vec<_>>()
372 .join("\n")
373 } else {
374 content.clone()
375 };
376 (Some(clean_content), None, vec![], false, is_comment)
377 }
378 };
379
380 let next_id = existing.len() as u32 + 1;
383 let content_id = existing.entry(key).or_insert(next_id);
384 node.info().content_id.set(*content_id);
385 }
386}
387
388fn set_num_after(nodes: &[&Syntax], parent_num_after: usize) {
389 for (i, node) in nodes.iter().enumerate() {
390 let num_after = parent_num_after + nodes.len() - 1 - i;
391 node.info().num_after.set(num_after);
392
393 if let List { children, .. } = node {
394 set_num_after(children, num_after);
395 }
396 }
397}
398
399pub fn init_next_prev<'a>(roots: &[&'a Syntax<'a>]) {
400 set_prev_sibling(roots);
401 set_next_sibling(roots);
402 set_prev(roots, None);
403}
404
405fn init_info_on_side<'a>(roots: &[&'a Syntax<'a>], next_id: &mut SyntaxId) {
408 set_parent(roots, None);
409 set_num_ancestors(roots, 0);
410 set_num_after(roots, 0);
411 set_unique_id(roots, next_id);
412}
413
414fn set_unique_id(nodes: &[&Syntax], next_id: &mut SyntaxId) {
415 for node in nodes {
416 node.info().unique_id.set(*next_id);
417 *next_id = NonZeroU32::new(u32::from(*next_id) + 1)
418 .expect("Should not have more than u32::MAX nodes");
419 if let List { children, .. } = node {
420 set_unique_id(children, next_id);
421 }
422 }
423}
424
425fn find_nodes_with_unique_content(nodes: &[&Syntax], counts: &mut DftHashMap<ContentId, usize>) {
427 for node in nodes {
428 *counts.entry(node.content_id()).or_insert(0) += 1;
429 if let List { children, .. } = node {
430 find_nodes_with_unique_content(children, counts);
431 }
432 }
433}
434
435fn set_content_is_unique_from_counts(nodes: &[&Syntax], counts: &DftHashMap<ContentId, usize>) {
436 for node in nodes {
437 let count = counts
438 .get(&node.content_id())
439 .expect("Count should be present");
440 node.info().content_is_unique.set(*count == 1);
441
442 if let List { children, .. } = node {
443 set_content_is_unique_from_counts(children, counts);
444 }
445 }
446}
447
448fn set_content_is_unique(nodes: &[&Syntax]) {
449 let mut counts = DftHashMap::default();
450 find_nodes_with_unique_content(nodes, &mut counts);
451 set_content_is_unique_from_counts(nodes, &counts);
452}
453
454fn set_prev_sibling<'a>(nodes: &[&'a Syntax<'a>]) {
455 let mut prev = None;
456
457 for node in nodes {
458 node.info().previous_sibling.set(prev);
459 prev = Some(node);
460
461 if let List { children, .. } = node {
462 set_prev_sibling(children);
463 }
464 }
465}
466
467fn set_next_sibling<'a>(nodes: &[&'a Syntax<'a>]) {
468 for (i, node) in nodes.iter().enumerate() {
469 let sibling = nodes.get(i + 1).copied();
470 node.info().next_sibling.set(sibling);
471
472 if let List { children, .. } = node {
473 set_next_sibling(children);
474 }
475 }
476}
477
478fn set_prev<'a>(nodes: &[&'a Syntax<'a>], parent: Option<&'a Syntax<'a>>) {
481 for (i, node) in nodes.iter().enumerate() {
482 let node_prev = if i == 0 { parent } else { Some(nodes[i - 1]) };
483
484 node.info().prev.set(node_prev);
485 if let List { children, .. } = node {
486 set_prev(children, Some(node));
487 }
488 }
489}
490
491fn set_parent<'a>(nodes: &[&'a Syntax<'a>], parent: Option<&'a Syntax<'a>>) {
492 for node in nodes {
493 node.info().parent.set(parent);
494 if let List { children, .. } = node {
495 set_parent(children, Some(node));
496 }
497 }
498}
499
500fn set_num_ancestors(nodes: &[&Syntax], num_ancestors: u32) {
501 for node in nodes {
502 node.info().num_ancestors.set(num_ancestors);
503
504 if let List { children, .. } = node {
505 set_num_ancestors(children, num_ancestors + 1);
506 }
507 }
508}
509
510impl PartialEq for Syntax<'_> {
511 fn eq(&self, other: &Self) -> bool {
512 debug_assert!(self.content_id() > 0);
513 debug_assert!(other.content_id() > 0);
514 self.content_id() == other.content_id()
515 }
516}
517impl<'a> Eq for Syntax<'a> {}
518
519#[derive(PartialEq, Eq, Debug, Clone, Copy, Hash)]
522pub enum StringKind {
523 StringLiteral,
525 Text,
527}
528
529#[derive(PartialEq, Eq, Debug, Clone, Copy, Hash)]
530pub enum AtomKind {
531 Normal,
535 String(StringKind),
536 Type,
537 Comment,
538 Keyword,
539 TreeSitterError,
540}