1use std::collections::BTreeMap;
2use std::fmt;
3
4use crate::configuration::{ParseConfiguration, TriviaAttachmentPolicy};
5use crate::language_parser::{BuiltInLanguageParser, LanguageParser};
6use crate::link_flags::LinkFlags;
7use crate::mixed_regions::{detect_embedded_regions, EmbeddedRegion};
8use crate::query::LinkQuery;
9use crate::source::{ByteRange, Point, SourceSpan};
10use crate::substitution::{SubstitutionReport, SubstitutionRule};
11use crate::tree_sitter_adapter;
12use crate::verification::{VerificationIssue, VerificationIssueKind, VerificationReport};
13
14#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
16pub struct LinkId(u64);
17
18impl LinkId {
19 #[must_use]
21 pub const fn as_u64(self) -> u64 {
22 self.0
23 }
24}
25
26impl fmt::Display for LinkId {
27 fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
28 write!(formatter, "{}", self.0)
29 }
30}
31
32#[derive(Clone, Copy, Debug, PartialEq, Eq)]
34pub enum LinkType {
35 Link,
36 Reference,
37 Relation,
38 Language,
39 Grammar,
40 Type,
41 Concept,
42 Syntax,
43 Field,
44 Trivia,
45 Token,
46 Document,
47 Semantic,
48 Region,
49 Object,
50}
51
52#[derive(Clone, Copy, Debug, PartialEq, Eq)]
54pub enum NetworkProjection {
55 Lossless,
57 ConcreteSyntax,
59 AbstractSyntax,
61 Semantic,
63}
64
65impl NetworkProjection {
66 #[must_use]
68 pub const fn label(self) -> &'static str {
69 match self {
70 Self::Lossless => "lossless",
71 Self::ConcreteSyntax => "concrete syntax",
72 Self::AbstractSyntax => "abstract syntax",
73 Self::Semantic => "semantic",
74 }
75 }
76
77 fn includes(self, link: &Link) -> bool {
78 match self {
79 Self::Lossless => true,
80 Self::ConcreteSyntax => link.metadata().link_type() != Some(LinkType::Semantic),
81 Self::AbstractSyntax => !matches!(
82 link.metadata().link_type(),
83 Some(LinkType::Token | LinkType::Trivia)
84 ),
85 Self::Semantic => matches!(
86 link.metadata().link_type(),
87 Some(LinkType::Semantic | LinkType::Concept | LinkType::Type | LinkType::Language)
88 ),
89 }
90 }
91}
92
93impl fmt::Display for LinkType {
94 fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
95 let name = match self {
96 Self::Link => "link",
97 Self::Reference => "reference",
98 Self::Relation => "relation",
99 Self::Language => "language",
100 Self::Grammar => "grammar",
101 Self::Type => "type",
102 Self::Concept => "concept",
103 Self::Syntax => "syntax",
104 Self::Field => "field",
105 Self::Trivia => "trivia",
106 Self::Token => "token",
107 Self::Document => "document",
108 Self::Semantic => "semantic",
109 Self::Region => "region",
110 Self::Object => "object",
111 };
112 formatter.write_str(name)
113 }
114}
115
116#[derive(Clone, Debug, Default, PartialEq, Eq)]
118pub struct LinkMetadata {
119 link_type: Option<LinkType>,
120 named: bool,
121 term: Option<String>,
122 definition: Option<String>,
123 language: Option<String>,
124 span: Option<SourceSpan>,
125 flags: LinkFlags,
126}
127
128impl LinkMetadata {
129 #[must_use]
131 pub fn new() -> Self {
132 Self::default()
133 }
134
135 #[must_use]
137 pub const fn with_link_type(mut self, link_type: LinkType) -> Self {
138 self.link_type = Some(link_type);
139 self
140 }
141
142 #[must_use]
144 pub const fn with_named(mut self, named: bool) -> Self {
145 self.named = named;
146 self
147 }
148
149 #[must_use]
151 pub fn with_term(mut self, term: impl Into<String>) -> Self {
152 self.term = Some(term.into());
153 self
154 }
155
156 #[must_use]
158 pub fn with_definition(mut self, definition: impl Into<String>) -> Self {
159 self.definition = Some(definition.into());
160 self
161 }
162
163 #[must_use]
165 pub fn with_language(mut self, language: impl Into<String>) -> Self {
166 self.language = Some(language.into());
167 self
168 }
169
170 #[must_use]
172 pub const fn with_span(mut self, span: SourceSpan) -> Self {
173 self.span = Some(span);
174 self
175 }
176
177 #[must_use]
179 pub const fn with_flags(mut self, flags: LinkFlags) -> Self {
180 self.flags = flags;
181 self
182 }
183
184 #[must_use]
186 pub const fn link_type(&self) -> Option<LinkType> {
187 self.link_type
188 }
189
190 #[must_use]
192 pub const fn is_named(&self) -> bool {
193 self.named
194 }
195
196 #[must_use]
198 pub fn term(&self) -> Option<&str> {
199 self.term.as_deref()
200 }
201
202 #[must_use]
204 pub fn definition(&self) -> Option<&str> {
205 self.definition.as_deref()
206 }
207
208 #[must_use]
210 pub fn language(&self) -> Option<&str> {
211 self.language.as_deref()
212 }
213
214 #[must_use]
216 pub const fn span(&self) -> Option<SourceSpan> {
217 self.span
218 }
219
220 #[must_use]
222 pub const fn flags(&self) -> LinkFlags {
223 self.flags
224 }
225}
226
227#[derive(Clone, Debug, PartialEq, Eq)]
229pub struct Link {
230 id: LinkId,
231 references: Vec<LinkId>,
232 metadata: LinkMetadata,
233}
234
235impl Link {
236 #[must_use]
238 pub const fn id(&self) -> LinkId {
239 self.id
240 }
241
242 #[must_use]
244 pub fn references(&self) -> &[LinkId] {
245 &self.references
246 }
247
248 #[must_use]
250 pub const fn metadata(&self) -> &LinkMetadata {
251 &self.metadata
252 }
253
254 const fn metadata_mut(&mut self) -> &mut LinkMetadata {
255 &mut self.metadata
256 }
257}
258
259#[derive(Clone, Debug, Default, PartialEq, Eq)]
261pub struct LinkNetwork {
262 next_id: u64,
263 links: BTreeMap<LinkId, Link>,
264 terms: BTreeMap<String, LinkId>,
265 concept_syntax: BTreeMap<(String, String), String>,
266}
267
268impl LinkNetwork {
269 #[must_use]
271 pub const fn new() -> Self {
272 Self {
273 next_id: 1,
274 links: BTreeMap::new(),
275 terms: BTreeMap::new(),
276 concept_syntax: BTreeMap::new(),
277 }
278 }
279
280 #[must_use]
282 pub fn self_describing() -> Self {
283 let mut network = Self::new();
284 network.insert_typed_point(
285 "link",
286 LinkType::Link,
287 Some("A link is an n-tuple of references to links."),
288 );
289 network.insert_typed_point(
290 "reference",
291 LinkType::Reference,
292 Some("A reference is one position in a link that points to another link."),
293 );
294 network.insert_typed_point(
295 "relation link",
296 LinkType::Relation,
297 Some("A relation link connects references to other links and is itself a link."),
298 );
299 network.insert_typed_point(
300 "language",
301 LinkType::Language,
302 Some("A language is a set of grammar, syntax, and semantic links."),
303 );
304 network.insert_typed_point(
305 "grammar",
306 LinkType::Grammar,
307 Some("A grammar describes which syntax links fully match a language."),
308 );
309 network.insert_typed_point(
310 "type",
311 LinkType::Type,
312 Some("A type is a link that constrains or classifies other links."),
313 );
314 network.insert_typed_point(
315 "concept",
316 LinkType::Concept,
317 Some("A concept is a shared meaning link that multiple languages can reference."),
318 );
319 network.insert_typed_point(
320 "point",
321 LinkType::Concept,
322 Some("A point is represented as a self-referential link."),
323 );
324 network.insert_typed_point(
325 "field",
326 LinkType::Field,
327 Some("A field is a labeled relation link from a parent link to a child link."),
328 );
329 network.insert_typed_point(
330 "trivia",
331 LinkType::Trivia,
332 Some("Trivia is source text preserved by explicit attachment links."),
333 );
334 network.insert_typed_point(
335 "region",
336 LinkType::Region,
337 Some("A region is a source span with a selected or detected language."),
338 );
339 network.insert_typed_point(
340 "object",
341 LinkType::Object,
342 Some("An object identity is represented by a link that other links can share."),
343 );
344 network
345 }
346
347 #[must_use]
353 pub fn parse(text: &str, language: &str, configuration: ParseConfiguration) -> Self {
354 BuiltInLanguageParser.parse_source(text, language, configuration)
355 }
356
357 #[must_use]
363 pub fn parse_lossless_text(
364 text: &str,
365 language: &str,
366 configuration: ParseConfiguration,
367 ) -> Self {
368 let (mut network, document) = Self::new_parse_document(text, language);
369
370 let mut row = 0;
371 let mut column = 0;
372 let mut open_parentheses = Vec::new();
373 for (start, character) in text.char_indices() {
374 let start_point = Point::new(row, column);
375 let end = start + character.len_utf8();
376 if character == '\n' {
377 row += 1;
378 column = 0;
379 } else {
380 column += 1;
381 }
382 let end_point = Point::new(row, column);
383 let span = SourceSpan::new(ByteRange::new(start, end), start_point, end_point);
384 let mut metadata = LinkMetadata::new()
385 .with_link_type(LinkType::Token)
386 .with_named(!character.is_whitespace())
387 .with_term(character.to_string())
388 .with_language(language)
389 .with_span(span);
390
391 if character.is_whitespace() {
392 metadata = metadata.with_flags(LinkFlags::extra());
393 }
394
395 let token = network.insert_link([document], metadata);
396 match character {
397 '(' => open_parentheses.push(token),
398 ')' if open_parentheses.pop().is_none() => {
399 network.set_flags(token, LinkFlags::error());
400 }
401 _ => {}
402 }
403 if character.is_whitespace() {
404 network.attach_trivia(
405 document,
406 token,
407 span,
408 configuration.trivia_attachment_policy(),
409 );
410 }
411 }
412
413 let missing_span = SourceSpan::new(
414 ByteRange::new(text.len(), text.len()),
415 end_point_for_text(text),
416 end_point_for_text(text),
417 );
418 for open_parenthesis in open_parentheses {
419 network.set_flags(open_parenthesis, LinkFlags::containing_error());
420 network.insert_link(
421 [document],
422 LinkMetadata::new()
423 .with_link_type(LinkType::Token)
424 .with_named(false)
425 .with_term(")")
426 .with_language(language)
427 .with_span(missing_span)
428 .with_flags(LinkFlags::missing()),
429 );
430 }
431
432 network.attach_embedded_regions(document, text, language, configuration);
433
434 network
435 }
436
437 pub(crate) fn new_parse_document(text: &str, language: &str) -> (Self, LinkId) {
438 let mut network = Self::self_describing();
439 let language_link = network.insert_typed_point(language, LinkType::Language, None);
440 let document_span = SourceSpan::new(
441 ByteRange::new(0, text.len()),
442 Point::new(0, 0),
443 end_point_for_text(text),
444 );
445 let document = network.insert_link(
446 [language_link],
447 LinkMetadata::new()
448 .with_link_type(LinkType::Document)
449 .with_named(true)
450 .with_term(format!("{language} document"))
451 .with_language(language)
452 .with_span(document_span),
453 );
454 (network, document)
455 }
456
457 #[must_use]
459 pub fn len(&self) -> usize {
460 self.links.len()
461 }
462
463 #[must_use]
465 pub fn is_empty(&self) -> bool {
466 self.links.is_empty()
467 }
468
469 pub fn links(&self) -> impl Iterator<Item = &Link> {
471 self.links.values()
472 }
473
474 pub fn projected_links(&self, projection: NetworkProjection) -> impl Iterator<Item = &Link> {
476 self.links().filter(move |link| projection.includes(link))
477 }
478
479 #[must_use]
481 pub fn reconstruct_text(&self) -> String {
482 let mut tokens = self
483 .links()
484 .filter(|link| link.metadata().link_type() == Some(LinkType::Token))
485 .filter(|link| !link.metadata().flags().is_missing())
486 .filter_map(|link| {
487 Some((
488 link.metadata().span()?.byte_range(),
489 link.id().as_u64(),
490 link.metadata().term()?.to_string(),
491 ))
492 })
493 .collect::<Vec<_>>();
494
495 tokens.sort_by_key(|(range, id, _term)| (range.start(), *id));
496 let mut reconstructed = String::new();
497 let mut covered_until = 0;
498 for (range, _id, term) in tokens {
499 if range.start() < covered_until {
500 continue;
501 }
502 reconstructed.push_str(&term);
503 covered_until = range.end();
504 }
505 reconstructed
506 }
507
508 #[must_use]
510 pub fn embedded_regions(&self) -> Vec<EmbeddedRegion> {
511 self.links()
512 .filter(|link| link.metadata().link_type() == Some(LinkType::Region))
513 .filter_map(|link| {
514 Some(EmbeddedRegion::new(
515 link.metadata().language()?.to_string(),
516 link.metadata().span()?,
517 ))
518 })
519 .collect()
520 }
521
522 #[must_use]
524 pub fn query_links(&self, query: &LinkQuery) -> Vec<&Link> {
525 self.links().filter(|link| query.matches(link)).collect()
526 }
527
528 pub fn insert_point(&mut self, term: &str) -> LinkId {
530 self.insert_typed_point(term, LinkType::Concept, None)
531 }
532
533 pub fn insert_object(&mut self, term: &str) -> LinkId {
535 self.insert_typed_point(term, LinkType::Object, None)
536 }
537
538 pub fn insert_relation<const N: usize>(
540 &mut self,
541 references: [LinkId; N],
542 link_type: LinkType,
543 span: SourceSpan,
544 ) -> LinkId {
545 self.insert_link(
546 references,
547 LinkMetadata::new()
548 .with_link_type(link_type)
549 .with_span(span),
550 )
551 }
552
553 pub fn insert_field(&mut self, parent: LinkId, label: &str, child: LinkId) -> LinkId {
555 let label_link = self.insert_typed_point(
556 label,
557 LinkType::Field,
558 Some("A field label names a relation between links."),
559 );
560 self.insert_link(
561 [parent, label_link, child],
562 LinkMetadata::new().with_link_type(LinkType::Field),
563 )
564 }
565
566 pub fn insert_link<const N: usize>(
568 &mut self,
569 references: [LinkId; N],
570 metadata: LinkMetadata,
571 ) -> LinkId {
572 let id = self.allocate_id();
573 self.links.insert(
574 id,
575 Link {
576 id,
577 references: references.to_vec(),
578 metadata,
579 },
580 );
581 id
582 }
583
584 pub fn insert_concept_mapping(
586 &mut self,
587 concept: &str,
588 language: &str,
589 syntax: &str,
590 ) -> LinkId {
591 let concept_link = self.insert_typed_point(
592 concept,
593 LinkType::Concept,
594 Some("A concept mapping connects shared meaning to language syntax."),
595 );
596 let language_link = self.insert_typed_point(language, LinkType::Language, None);
597 let mapping = self.insert_link(
598 [concept_link, language_link],
599 LinkMetadata::new()
600 .with_link_type(LinkType::Semantic)
601 .with_named(true)
602 .with_term(syntax)
603 .with_language(language),
604 );
605 self.concept_syntax.insert(
606 (concept.to_string(), language.to_string()),
607 syntax.to_string(),
608 );
609 mapping
610 }
611
612 #[must_use]
614 pub fn reconstruct_concept(&self, concept: &str, language: &str) -> Option<&str> {
615 self.concept_syntax
616 .get(&(concept.to_string(), language.to_string()))
617 .map(String::as_str)
618 }
619
620 pub fn apply_substitution(&mut self, rule: &SubstitutionRule) -> SubstitutionReport {
622 let mut report = SubstitutionReport::default();
623
624 if rule.pattern().is_empty() {
625 if !rule.replacement().is_empty() {
626 let created = self.insert_dynamic_link(
627 rule.replacement(),
628 LinkMetadata::new().with_link_type(LinkType::Relation),
629 );
630 report.created.push(created);
631 }
632 return report;
633 }
634
635 let matched = self
636 .links()
637 .filter(|link| link.references() == rule.pattern())
638 .map(Link::id)
639 .collect::<Vec<_>>();
640
641 if rule.replacement().is_empty() {
642 for id in matched {
643 if self.links.remove(&id).is_some() {
644 report.deleted.push(id);
645 }
646 }
647 return report;
648 }
649
650 for id in matched {
651 if let Some(link) = self.links.get_mut(&id) {
652 link.references = rule.replacement().to_vec();
653 report.updated.push(id);
654 }
655 }
656
657 report
658 }
659
660 #[must_use]
662 pub fn link(&self, id: LinkId) -> Option<&Link> {
663 self.links.get(&id)
664 }
665
666 #[must_use]
668 pub fn find_term(&self, term: &str) -> Option<LinkId> {
669 self.terms.get(term).copied()
670 }
671
672 #[must_use]
674 pub fn definition_for(&self, id: LinkId) -> Option<&str> {
675 self.link(id).and_then(|link| link.metadata().definition())
676 }
677
678 pub fn set_span(&mut self, id: LinkId, span: SourceSpan) -> bool {
680 let Some(link) = self.links.get_mut(&id) else {
681 return false;
682 };
683 link.metadata_mut().span = Some(span);
684 true
685 }
686
687 pub fn set_flags(&mut self, id: LinkId, flags: LinkFlags) -> bool {
689 let Some(link) = self.links.get_mut(&id) else {
690 return false;
691 };
692 link.metadata_mut().flags = flags;
693 true
694 }
695
696 #[must_use]
698 pub fn verify_full_match(&self, region: Option<ByteRange>) -> VerificationReport {
699 let issues = self
700 .links
701 .values()
702 .filter(|link| link_is_in_region(link, region))
703 .filter_map(|link| {
704 let flags = link.metadata().flags();
705 let kind = if flags.is_error() {
706 VerificationIssueKind::ErrorLink
707 } else if flags.is_missing() {
708 VerificationIssueKind::MissingLink
709 } else if flags.has_error() {
710 VerificationIssueKind::HasErrorLink
711 } else {
712 return None;
713 };
714
715 Some(VerificationIssue::new(
716 link.id(),
717 kind,
718 link.metadata().span(),
719 ))
720 })
721 .collect();
722 VerificationReport::new(issues)
723 }
724
725 fn insert_typed_point(
726 &mut self,
727 term: &str,
728 link_type: LinkType,
729 definition: Option<&str>,
730 ) -> LinkId {
731 if let Some(id) = self.terms.get(term).copied() {
732 if let Some(definition) = definition {
733 if let Some(link) = self.links.get_mut(&id) {
734 link.metadata_mut().definition = Some(definition.to_string());
735 }
736 }
737 return id;
738 }
739
740 let id = self.allocate_id();
741 let mut metadata = LinkMetadata::new()
742 .with_link_type(link_type)
743 .with_named(true)
744 .with_term(term);
745 if let Some(definition) = definition {
746 metadata = metadata.with_definition(definition);
747 }
748 self.links.insert(
749 id,
750 Link {
751 id,
752 references: vec![id],
753 metadata,
754 },
755 );
756 self.terms.insert(term.to_string(), id);
757 id
758 }
759
760 pub(crate) fn attach_trivia(
761 &mut self,
762 document: LinkId,
763 token: LinkId,
764 span: SourceSpan,
765 policy: TriviaAttachmentPolicy,
766 ) {
767 match policy {
768 TriviaAttachmentPolicy::ContainmentLink => {
769 self.insert_containment_trivia(document, token, span);
770 }
771 TriviaAttachmentPolicy::TokenLink => {
772 self.insert_token_trivia(token, span);
773 }
774 TriviaAttachmentPolicy::Both => {
775 self.insert_containment_trivia(document, token, span);
776 self.insert_token_trivia(token, span);
777 }
778 }
779 }
780
781 fn insert_containment_trivia(&mut self, document: LinkId, token: LinkId, span: SourceSpan) {
782 self.insert_link(
783 [document, token],
784 LinkMetadata::new()
785 .with_link_type(LinkType::Trivia)
786 .with_term("containment trivia")
787 .with_span(span)
788 .with_flags(LinkFlags::extra()),
789 );
790 }
791
792 fn insert_token_trivia(&mut self, token: LinkId, span: SourceSpan) {
793 self.insert_link(
794 [token],
795 LinkMetadata::new()
796 .with_link_type(LinkType::Trivia)
797 .with_term("token trivia")
798 .with_span(span)
799 .with_flags(LinkFlags::extra()),
800 );
801 }
802
803 fn insert_dynamic_link(&mut self, references: &[LinkId], metadata: LinkMetadata) -> LinkId {
804 let id = self.allocate_id();
805 self.links.insert(
806 id,
807 Link {
808 id,
809 references: references.to_vec(),
810 metadata,
811 },
812 );
813 id
814 }
815
816 pub(crate) fn attach_embedded_regions(
817 &mut self,
818 document: LinkId,
819 text: &str,
820 language: &str,
821 configuration: ParseConfiguration,
822 ) {
823 let policy = configuration.region_detection_policy();
824 for region in detect_embedded_regions(text, language, policy) {
825 let region_language = region.language().to_string();
826 let language_link = self.insert_typed_point(®ion_language, LinkType::Language, None);
827 let region_link = self.insert_link(
828 [document, language_link],
829 LinkMetadata::new()
830 .with_link_type(LinkType::Region)
831 .with_named(true)
832 .with_term(format!("{region_language} region"))
833 .with_language(region_language)
834 .with_span(region.span()),
835 );
836 let range = region.span().byte_range();
837 let region_text = &text[range.start()..range.end()];
838 let _ = tree_sitter_adapter::parse_embedded_region_into(
839 self,
840 region_link,
841 region_text,
842 region.language(),
843 region.span(),
844 configuration,
845 );
846 }
847 }
848
849 const fn allocate_id(&mut self) -> LinkId {
850 let id = LinkId(self.next_id);
851 self.next_id += 1;
852 id
853 }
854}
855
856fn link_is_in_region(link: &Link, region: Option<ByteRange>) -> bool {
857 let Some(region) = region else {
858 return true;
859 };
860 link.metadata()
861 .span()
862 .is_some_and(|span| span.byte_range().intersects(region))
863}
864
865fn end_point_for_text(text: &str) -> Point {
866 let mut row = 0;
867 let mut column = 0;
868 for character in text.chars() {
869 if character == '\n' {
870 row += 1;
871 column = 0;
872 } else {
873 column += 1;
874 }
875 }
876 Point::new(row, column)
877}