1#![allow(clippy::mutable_key_type)] use std::{cell::Cell, fmt, hash::Hash, num::NonZeroU32};
6
7use line_numbers::SingleLineSpan;
8use typed_arena::Arena;
9
10use self::Syntax::*;
11use super::{
12 changes::ChangeKind,
13 changes::ChangeKind::*,
14 hash::DftHashMap,
15};
16
17fn split_on_newlines(s: &str) -> impl Iterator<Item = &str> {
19 s.split('\n').map(|l| l.strip_suffix('\r').unwrap_or(l))
20}
21
22impl fmt::Debug for ChangeKind<'_> {
26 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
27 let desc = match self {
28 Unchanged(node) => format!("Unchanged(ID: {})", node.id()),
29 ReplacedComment(lhs_node, rhs_node) | ReplacedString(lhs_node, rhs_node) => {
30 let change_kind = if let ReplacedComment(_, _) = self {
31 "ReplacedComment"
32 } else {
33 "ReplacedString"
34 };
35
36 format!(
37 "{}(lhs ID: {}, rhs ID: {})",
38 change_kind,
39 lhs_node.id(),
40 rhs_node.id()
41 )
42 }
43 Novel => "Novel".to_owned(),
44 };
45 f.write_str(&desc)
46 }
47}
48
49pub type SyntaxId = NonZeroU32;
50
51pub type ContentId = u32;
52
53pub struct SyntaxInfo<'a> {
55 previous_sibling: Cell<Option<&'a Syntax<'a>>>,
57 next_sibling: Cell<Option<&'a Syntax<'a>>>,
59 prev: Cell<Option<&'a Syntax<'a>>>,
62 parent: Cell<Option<&'a Syntax<'a>>>,
64 num_ancestors: Cell<u32>,
66 pub num_after: Cell<usize>,
67 unique_id: Cell<SyntaxId>,
69 content_id: Cell<ContentId>,
75 content_is_unique: Cell<bool>,
78}
79
80impl<'a> SyntaxInfo<'a> {
81 pub fn new() -> Self {
82 Self {
83 previous_sibling: Cell::new(None),
84 next_sibling: Cell::new(None),
85 prev: Cell::new(None),
86 parent: Cell::new(None),
87 num_ancestors: Cell::new(0),
88 num_after: Cell::new(0),
89 unique_id: Cell::new(NonZeroU32::new(u32::MAX).unwrap()),
90 content_id: Cell::new(0),
91 content_is_unique: Cell::new(false),
92 }
93 }
94}
95
96impl Default for SyntaxInfo<'_> {
97 fn default() -> Self {
98 Self::new()
99 }
100}
101
102pub enum Syntax<'a> {
103 List {
104 info: SyntaxInfo<'a>,
105 open_position: Vec<SingleLineSpan>,
106 open_content: String,
107 children: Vec<&'a Syntax<'a>>,
108 close_position: Vec<SingleLineSpan>,
109 close_content: String,
110 num_descendants: u32,
111 },
112 Atom {
113 info: SyntaxInfo<'a>,
114 position: Vec<SingleLineSpan>,
115 content: String,
116 kind: AtomKind,
117 },
118}
119
120fn dbg_pos(pos: &[SingleLineSpan]) -> String {
121 match pos {
122 [] => "-".into(),
123 [pos] => format!("{}:{}-{}", pos.line.0, pos.start_col, pos.end_col),
124 [start, .., end] => format!(
125 "{}:{}-{}:{}",
126 start.line.0, start.start_col, end.line.0, end.end_col
127 ),
128 }
129}
130
131impl<'a> fmt::Debug for Syntax<'a> {
132 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
133 match self {
134 List {
135 open_content,
136 open_position,
137 children,
138 close_content,
139 close_position,
140 ..
141 } => {
142 let ds = f.debug_struct(&format!(
143 "List id:{} content_id:{}",
144 self.id(),
145 self.content_id()
146 ));
147 let mut ds = ds;
148
149 ds.field("open_content", &open_content)
150 .field("open_position", &dbg_pos(open_position))
151 .field("children", &children)
152 .field("close_content", &close_content)
153 .field("close_position", &dbg_pos(close_position));
154
155 ds.finish()
156 }
157 Atom {
158 content, position, ..
159 } => {
160 let ds = f.debug_struct(&format!(
161 "Atom id:{} content_id:{}",
162 self.id(),
163 self.content_id()
164 ));
165 let mut ds = ds;
166 ds.field("content", &content);
167 ds.field("position", &dbg_pos(position));
168
169 ds.finish()
170 }
171 }
172 }
173}
174
175impl<'a> Syntax<'a> {
176 pub fn new_list(
177 arena: &'a Arena<Self>,
178 open_content: &str,
179 open_position: Vec<SingleLineSpan>,
180 children: Vec<&'a Self>,
181 close_content: &str,
182 close_position: Vec<SingleLineSpan>,
183 ) -> &'a Self {
184 let children = children
188 .into_iter()
189 .filter(|n| match n {
190 List { .. } => true,
191 Atom { content, .. } => !content.is_empty(),
192 })
193 .collect::<Vec<_>>();
194
195 if children.len() == 1 && open_content.is_empty() && close_content.is_empty() {
205 return children[0];
206 }
207
208 let mut num_descendants = 0;
209 for child in &children {
210 num_descendants += match child {
211 List {
212 num_descendants, ..
213 } => *num_descendants + 1,
214 Atom { .. } => 1,
215 };
216 }
217
218 arena.alloc(List {
219 info: SyntaxInfo::default(),
220 open_position,
221 open_content: open_content.into(),
222 close_content: close_content.into(),
223 close_position,
224 children,
225 num_descendants,
226 })
227 }
228
229 pub fn new_atom(
230 arena: &'a Arena<Self>,
231 mut position: Vec<SingleLineSpan>,
232 mut content: String,
233 kind: AtomKind,
234 ) -> &'a Self {
235 if content.ends_with('\r') {
238 content.pop();
239 }
240
241 if content.ends_with('\n') {
246 position.pop();
247 content.pop();
248 }
249
250 arena.alloc(Atom {
251 info: SyntaxInfo::default(),
252 position,
253 content,
254 kind,
255 })
256 }
257
258 pub fn info(&self) -> &SyntaxInfo<'a> {
259 match self {
260 List { info, .. } | Atom { info, .. } => info,
261 }
262 }
263
264 pub fn parent(&self) -> Option<&'a Self> {
265 self.info().parent.get()
266 }
267
268 pub fn next_sibling(&self) -> Option<&'a Self> {
269 self.info().next_sibling.get()
270 }
271
272 pub fn id(&self) -> SyntaxId {
275 self.info().unique_id.get()
276 }
277
278 pub fn content_id(&self) -> ContentId {
282 self.info().content_id.get()
283 }
284
285 pub fn content_is_unique(&self) -> bool {
286 self.info().content_is_unique.get()
287 }
288
289 pub fn num_ancestors(&self) -> u32 {
290 self.info().num_ancestors.get()
291 }
292
293 pub fn dbg_content(&self) -> String {
294 match self {
295 List {
296 open_content,
297 open_position,
298 close_content,
299 ..
300 } => {
301 let line = open_position
302 .first()
303 .map(|p| p.line.display())
304 .unwrap_or_else(|| "?".to_owned());
305
306 format!("line:{} {} ... {}", line, open_content, close_content)
307 }
308 Atom {
309 content, position, ..
310 } => {
311 let line = position
312 .first()
313 .map_or_else(|| "?".to_owned(), |p| p.line.display());
314
315 format!("line:{} {}", line, content)
316 }
317 }
318 }
319}
320
321pub fn init_all_info<'a>(lhs_roots: &[&'a Syntax<'a>], rhs_roots: &[&'a Syntax<'a>]) {
323 init_info(lhs_roots, rhs_roots);
324 init_next_prev(lhs_roots);
325 init_next_prev(rhs_roots);
326}
327
328fn init_info<'a>(lhs_roots: &[&'a Syntax<'a>], rhs_roots: &[&'a Syntax<'a>]) {
329 let mut id = NonZeroU32::new(1).unwrap();
330 init_info_on_side(lhs_roots, &mut id);
331 init_info_on_side(rhs_roots, &mut id);
332
333 let mut existing = DftHashMap::default();
334 set_content_id(lhs_roots, &mut existing);
335 set_content_id(rhs_roots, &mut existing);
336
337 set_content_is_unique(lhs_roots);
338 set_content_is_unique(rhs_roots);
339}
340
341type ContentKey = (Option<String>, Option<String>, Vec<u32>, bool, bool);
342
343fn set_content_id(nodes: &[&Syntax], existing: &mut DftHashMap<ContentKey, u32>) {
344 for node in nodes {
345 let key: ContentKey = match node {
346 List {
347 open_content,
348 close_content,
349 children,
350 ..
351 } => {
352 set_content_id(children, existing);
354
355 let children_content_ids: Vec<_> =
356 children.iter().map(|c| c.info().content_id.get()).collect();
357
358 (
359 Some(open_content.clone()),
360 Some(close_content.clone()),
361 children_content_ids,
362 true,
363 true,
364 )
365 }
366 Atom {
367 content,
368 kind: highlight,
369 ..
370 } => {
371 let is_comment = *highlight == AtomKind::Comment;
372 let clean_content = if is_comment && split_on_newlines(content).count() > 1 {
373 split_on_newlines(content)
374 .map(|l| l.trim_start())
375 .collect::<Vec<_>>()
376 .join("\n")
377 } else {
378 content.clone()
379 };
380 (Some(clean_content), None, vec![], false, is_comment)
381 }
382 };
383
384 let next_id = existing.len() as u32 + 1;
387 let content_id = existing.entry(key).or_insert(next_id);
388 node.info().content_id.set(*content_id);
389 }
390}
391
392fn set_num_after(nodes: &[&Syntax], parent_num_after: usize) {
393 for (i, node) in nodes.iter().enumerate() {
394 let num_after = parent_num_after + nodes.len() - 1 - i;
395 node.info().num_after.set(num_after);
396
397 if let List { children, .. } = node {
398 set_num_after(children, num_after);
399 }
400 }
401}
402
403pub fn init_next_prev<'a>(roots: &[&'a Syntax<'a>]) {
404 set_prev_sibling(roots);
405 set_next_sibling(roots);
406 set_prev(roots, None);
407}
408
409fn init_info_on_side<'a>(roots: &[&'a Syntax<'a>], next_id: &mut SyntaxId) {
412 set_parent(roots, None);
413 set_num_ancestors(roots, 0);
414 set_num_after(roots, 0);
415 set_unique_id(roots, next_id);
416}
417
418fn set_unique_id(nodes: &[&Syntax], next_id: &mut SyntaxId) {
419 for node in nodes {
420 node.info().unique_id.set(*next_id);
421 *next_id = NonZeroU32::new(u32::from(*next_id) + 1)
422 .expect("Should not have more than u32::MAX nodes");
423 if let List { children, .. } = node {
424 set_unique_id(children, next_id);
425 }
426 }
427}
428
429fn find_nodes_with_unique_content(nodes: &[&Syntax], counts: &mut DftHashMap<ContentId, usize>) {
431 for node in nodes {
432 *counts.entry(node.content_id()).or_insert(0) += 1;
433 if let List { children, .. } = node {
434 find_nodes_with_unique_content(children, counts);
435 }
436 }
437}
438
439fn set_content_is_unique_from_counts(nodes: &[&Syntax], counts: &DftHashMap<ContentId, usize>) {
440 for node in nodes {
441 let count = counts
442 .get(&node.content_id())
443 .expect("Count should be present");
444 node.info().content_is_unique.set(*count == 1);
445
446 if let List { children, .. } = node {
447 set_content_is_unique_from_counts(children, counts);
448 }
449 }
450}
451
452fn set_content_is_unique(nodes: &[&Syntax]) {
453 let mut counts = DftHashMap::default();
454 find_nodes_with_unique_content(nodes, &mut counts);
455 set_content_is_unique_from_counts(nodes, &counts);
456}
457
458fn set_prev_sibling<'a>(nodes: &[&'a Syntax<'a>]) {
459 let mut prev = None;
460
461 for node in nodes {
462 node.info().previous_sibling.set(prev);
463 prev = Some(node);
464
465 if let List { children, .. } = node {
466 set_prev_sibling(children);
467 }
468 }
469}
470
471fn set_next_sibling<'a>(nodes: &[&'a Syntax<'a>]) {
472 for (i, node) in nodes.iter().enumerate() {
473 let sibling = nodes.get(i + 1).copied();
474 node.info().next_sibling.set(sibling);
475
476 if let List { children, .. } = node {
477 set_next_sibling(children);
478 }
479 }
480}
481
482fn set_prev<'a>(nodes: &[&'a Syntax<'a>], parent: Option<&'a Syntax<'a>>) {
485 for (i, node) in nodes.iter().enumerate() {
486 let node_prev = if i == 0 { parent } else { Some(nodes[i - 1]) };
487
488 node.info().prev.set(node_prev);
489 if let List { children, .. } = node {
490 set_prev(children, Some(node));
491 }
492 }
493}
494
495fn set_parent<'a>(nodes: &[&'a Syntax<'a>], parent: Option<&'a Syntax<'a>>) {
496 for node in nodes {
497 node.info().parent.set(parent);
498 if let List { children, .. } = node {
499 set_parent(children, Some(node));
500 }
501 }
502}
503
504fn set_num_ancestors(nodes: &[&Syntax], num_ancestors: u32) {
505 for node in nodes {
506 node.info().num_ancestors.set(num_ancestors);
507
508 if let List { children, .. } = node {
509 set_num_ancestors(children, num_ancestors + 1);
510 }
511 }
512}
513
514impl PartialEq for Syntax<'_> {
515 fn eq(&self, other: &Self) -> bool {
516 debug_assert!(self.content_id() > 0);
517 debug_assert!(other.content_id() > 0);
518 self.content_id() == other.content_id()
519 }
520}
521impl<'a> Eq for Syntax<'a> {}
522
523#[derive(PartialEq, Eq, Debug, Clone, Copy, Hash)]
526pub enum StringKind {
527 StringLiteral,
529 Text,
531}
532
533#[derive(PartialEq, Eq, Debug, Clone, Copy, Hash)]
534pub enum AtomKind {
535 Normal,
539 String(StringKind),
540 Type,
541 Comment,
542 Keyword,
543 TreeSitterError,
544}