1use std::collections::BTreeMap;
2use std::fmt;
3
4use crate::configuration::{ParseConfiguration, RegionDetectionPolicy, TriviaAttachmentPolicy};
5use crate::language_parser::{BuiltInLanguageParser, LanguageParser};
6use crate::link_flags::LinkFlags;
7use crate::mixed_regions::{detect_embedded_regions, EmbeddedRegion};
8use crate::query::LinkQuery;
9use crate::source::{ByteRange, Point, SourceSpan};
10use crate::substitution::{SubstitutionReport, SubstitutionRule};
11use crate::verification::{VerificationIssue, VerificationIssueKind, VerificationReport};
12
13#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
15pub struct LinkId(u64);
16
17impl LinkId {
18 #[must_use]
20 pub const fn as_u64(self) -> u64 {
21 self.0
22 }
23}
24
25impl fmt::Display for LinkId {
26 fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
27 write!(formatter, "{}", self.0)
28 }
29}
30
31#[derive(Clone, Copy, Debug, PartialEq, Eq)]
33pub enum LinkType {
34 Link,
35 Reference,
36 Relation,
37 Language,
38 Grammar,
39 Type,
40 Concept,
41 Syntax,
42 Field,
43 Trivia,
44 Token,
45 Document,
46 Semantic,
47 Region,
48 Object,
49}
50
51#[derive(Clone, Copy, Debug, PartialEq, Eq)]
53pub enum NetworkProjection {
54 Lossless,
56 ConcreteSyntax,
58 AbstractSyntax,
60 Semantic,
62}
63
64impl NetworkProjection {
65 #[must_use]
67 pub const fn label(self) -> &'static str {
68 match self {
69 Self::Lossless => "lossless",
70 Self::ConcreteSyntax => "concrete syntax",
71 Self::AbstractSyntax => "abstract syntax",
72 Self::Semantic => "semantic",
73 }
74 }
75
76 fn includes(self, link: &Link) -> bool {
77 match self {
78 Self::Lossless => true,
79 Self::ConcreteSyntax => link.metadata().link_type() != Some(LinkType::Semantic),
80 Self::AbstractSyntax => !matches!(
81 link.metadata().link_type(),
82 Some(LinkType::Token | LinkType::Trivia)
83 ),
84 Self::Semantic => matches!(
85 link.metadata().link_type(),
86 Some(LinkType::Semantic | LinkType::Concept | LinkType::Type | LinkType::Language)
87 ),
88 }
89 }
90}
91
92impl fmt::Display for LinkType {
93 fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
94 let name = match self {
95 Self::Link => "link",
96 Self::Reference => "reference",
97 Self::Relation => "relation",
98 Self::Language => "language",
99 Self::Grammar => "grammar",
100 Self::Type => "type",
101 Self::Concept => "concept",
102 Self::Syntax => "syntax",
103 Self::Field => "field",
104 Self::Trivia => "trivia",
105 Self::Token => "token",
106 Self::Document => "document",
107 Self::Semantic => "semantic",
108 Self::Region => "region",
109 Self::Object => "object",
110 };
111 formatter.write_str(name)
112 }
113}
114
115#[derive(Clone, Debug, Default, PartialEq, Eq)]
117pub struct LinkMetadata {
118 link_type: Option<LinkType>,
119 named: bool,
120 term: Option<String>,
121 definition: Option<String>,
122 language: Option<String>,
123 span: Option<SourceSpan>,
124 flags: LinkFlags,
125}
126
127impl LinkMetadata {
128 #[must_use]
130 pub fn new() -> Self {
131 Self::default()
132 }
133
134 #[must_use]
136 pub const fn with_link_type(mut self, link_type: LinkType) -> Self {
137 self.link_type = Some(link_type);
138 self
139 }
140
141 #[must_use]
143 pub const fn with_named(mut self, named: bool) -> Self {
144 self.named = named;
145 self
146 }
147
148 #[must_use]
150 pub fn with_term(mut self, term: impl Into<String>) -> Self {
151 self.term = Some(term.into());
152 self
153 }
154
155 #[must_use]
157 pub fn with_definition(mut self, definition: impl Into<String>) -> Self {
158 self.definition = Some(definition.into());
159 self
160 }
161
162 #[must_use]
164 pub fn with_language(mut self, language: impl Into<String>) -> Self {
165 self.language = Some(language.into());
166 self
167 }
168
169 #[must_use]
171 pub const fn with_span(mut self, span: SourceSpan) -> Self {
172 self.span = Some(span);
173 self
174 }
175
176 #[must_use]
178 pub const fn with_flags(mut self, flags: LinkFlags) -> Self {
179 self.flags = flags;
180 self
181 }
182
183 #[must_use]
185 pub const fn link_type(&self) -> Option<LinkType> {
186 self.link_type
187 }
188
189 #[must_use]
191 pub const fn is_named(&self) -> bool {
192 self.named
193 }
194
195 #[must_use]
197 pub fn term(&self) -> Option<&str> {
198 self.term.as_deref()
199 }
200
201 #[must_use]
203 pub fn definition(&self) -> Option<&str> {
204 self.definition.as_deref()
205 }
206
207 #[must_use]
209 pub fn language(&self) -> Option<&str> {
210 self.language.as_deref()
211 }
212
213 #[must_use]
215 pub const fn span(&self) -> Option<SourceSpan> {
216 self.span
217 }
218
219 #[must_use]
221 pub const fn flags(&self) -> LinkFlags {
222 self.flags
223 }
224}
225
226#[derive(Clone, Debug, PartialEq, Eq)]
228pub struct Link {
229 id: LinkId,
230 references: Vec<LinkId>,
231 metadata: LinkMetadata,
232}
233
234impl Link {
235 #[must_use]
237 pub const fn id(&self) -> LinkId {
238 self.id
239 }
240
241 #[must_use]
243 pub fn references(&self) -> &[LinkId] {
244 &self.references
245 }
246
247 #[must_use]
249 pub const fn metadata(&self) -> &LinkMetadata {
250 &self.metadata
251 }
252
253 const fn metadata_mut(&mut self) -> &mut LinkMetadata {
254 &mut self.metadata
255 }
256}
257
258#[derive(Clone, Debug, Default, PartialEq, Eq)]
260pub struct LinkNetwork {
261 next_id: u64,
262 links: BTreeMap<LinkId, Link>,
263 terms: BTreeMap<String, LinkId>,
264 concept_syntax: BTreeMap<(String, String), String>,
265}
266
267impl LinkNetwork {
268 #[must_use]
270 pub const fn new() -> Self {
271 Self {
272 next_id: 1,
273 links: BTreeMap::new(),
274 terms: BTreeMap::new(),
275 concept_syntax: BTreeMap::new(),
276 }
277 }
278
279 #[must_use]
281 pub fn self_describing() -> Self {
282 let mut network = Self::new();
283 network.insert_typed_point(
284 "link",
285 LinkType::Link,
286 Some("A link is an n-tuple of references to links."),
287 );
288 network.insert_typed_point(
289 "reference",
290 LinkType::Reference,
291 Some("A reference is one position in a link that points to another link."),
292 );
293 network.insert_typed_point(
294 "relation link",
295 LinkType::Relation,
296 Some("A relation link connects references to other links and is itself a link."),
297 );
298 network.insert_typed_point(
299 "language",
300 LinkType::Language,
301 Some("A language is a set of grammar, syntax, and semantic links."),
302 );
303 network.insert_typed_point(
304 "grammar",
305 LinkType::Grammar,
306 Some("A grammar describes which syntax links fully match a language."),
307 );
308 network.insert_typed_point(
309 "type",
310 LinkType::Type,
311 Some("A type is a link that constrains or classifies other links."),
312 );
313 network.insert_typed_point(
314 "concept",
315 LinkType::Concept,
316 Some("A concept is a shared meaning link that multiple languages can reference."),
317 );
318 network.insert_typed_point(
319 "point",
320 LinkType::Concept,
321 Some("A point is represented as a self-referential link."),
322 );
323 network.insert_typed_point(
324 "field",
325 LinkType::Field,
326 Some("A field is a labeled relation link from a parent link to a child link."),
327 );
328 network.insert_typed_point(
329 "trivia",
330 LinkType::Trivia,
331 Some("Trivia is source text preserved by explicit attachment links."),
332 );
333 network.insert_typed_point(
334 "region",
335 LinkType::Region,
336 Some("A region is a source span with a selected or detected language."),
337 );
338 network.insert_typed_point(
339 "object",
340 LinkType::Object,
341 Some("An object identity is represented by a link that other links can share."),
342 );
343 network
344 }
345
346 #[must_use]
352 pub fn parse(text: &str, language: &str, configuration: ParseConfiguration) -> Self {
353 BuiltInLanguageParser.parse_source(text, language, configuration)
354 }
355
356 #[must_use]
362 pub fn parse_lossless_text(
363 text: &str,
364 language: &str,
365 configuration: ParseConfiguration,
366 ) -> Self {
367 let (mut network, document) = Self::new_parse_document(text, language);
368
369 let mut row = 0;
370 let mut column = 0;
371 let mut open_parentheses = Vec::new();
372 for (start, character) in text.char_indices() {
373 let start_point = Point::new(row, column);
374 let end = start + character.len_utf8();
375 if character == '\n' {
376 row += 1;
377 column = 0;
378 } else {
379 column += 1;
380 }
381 let end_point = Point::new(row, column);
382 let span = SourceSpan::new(ByteRange::new(start, end), start_point, end_point);
383 let mut metadata = LinkMetadata::new()
384 .with_link_type(LinkType::Token)
385 .with_named(!character.is_whitespace())
386 .with_term(character.to_string())
387 .with_language(language)
388 .with_span(span);
389
390 if character.is_whitespace() {
391 metadata = metadata.with_flags(LinkFlags::extra());
392 }
393
394 let token = network.insert_link([document], metadata);
395 match character {
396 '(' => open_parentheses.push(token),
397 ')' if open_parentheses.pop().is_none() => {
398 network.set_flags(token, LinkFlags::error());
399 }
400 _ => {}
401 }
402 if character.is_whitespace() {
403 network.attach_trivia(
404 document,
405 token,
406 span,
407 configuration.trivia_attachment_policy(),
408 );
409 }
410 }
411
412 let missing_span = SourceSpan::new(
413 ByteRange::new(text.len(), text.len()),
414 end_point_for_text(text),
415 end_point_for_text(text),
416 );
417 for open_parenthesis in open_parentheses {
418 network.set_flags(open_parenthesis, LinkFlags::containing_error());
419 network.insert_link(
420 [document],
421 LinkMetadata::new()
422 .with_link_type(LinkType::Token)
423 .with_named(false)
424 .with_term(")")
425 .with_language(language)
426 .with_span(missing_span)
427 .with_flags(LinkFlags::missing()),
428 );
429 }
430
431 network.attach_embedded_regions(
432 document,
433 text,
434 language,
435 configuration.region_detection_policy(),
436 );
437
438 network
439 }
440
441 pub(crate) fn new_parse_document(text: &str, language: &str) -> (Self, LinkId) {
442 let mut network = Self::self_describing();
443 let language_link = network.insert_typed_point(language, LinkType::Language, None);
444 let document_span = SourceSpan::new(
445 ByteRange::new(0, text.len()),
446 Point::new(0, 0),
447 end_point_for_text(text),
448 );
449 let document = network.insert_link(
450 [language_link],
451 LinkMetadata::new()
452 .with_link_type(LinkType::Document)
453 .with_named(true)
454 .with_term(format!("{language} document"))
455 .with_language(language)
456 .with_span(document_span),
457 );
458 (network, document)
459 }
460
461 #[must_use]
463 pub fn len(&self) -> usize {
464 self.links.len()
465 }
466
467 #[must_use]
469 pub fn is_empty(&self) -> bool {
470 self.links.is_empty()
471 }
472
473 pub fn links(&self) -> impl Iterator<Item = &Link> {
475 self.links.values()
476 }
477
478 pub fn projected_links(&self, projection: NetworkProjection) -> impl Iterator<Item = &Link> {
480 self.links().filter(move |link| projection.includes(link))
481 }
482
483 #[must_use]
485 pub fn reconstruct_text(&self) -> String {
486 let mut tokens = self
487 .links()
488 .filter(|link| link.metadata().link_type() == Some(LinkType::Token))
489 .filter(|link| !link.metadata().flags().is_missing())
490 .filter_map(|link| {
491 Some((
492 link.metadata().span()?.byte_range().start(),
493 link.id().as_u64(),
494 link.metadata().term()?.to_string(),
495 ))
496 })
497 .collect::<Vec<_>>();
498
499 tokens.sort_by_key(|(start, id, _term)| (*start, *id));
500 tokens.into_iter().map(|(_start, _id, term)| term).collect()
501 }
502
503 #[must_use]
505 pub fn embedded_regions(&self) -> Vec<EmbeddedRegion> {
506 self.links()
507 .filter(|link| link.metadata().link_type() == Some(LinkType::Region))
508 .filter_map(|link| {
509 Some(EmbeddedRegion::new(
510 link.metadata().language()?.to_string(),
511 link.metadata().span()?,
512 ))
513 })
514 .collect()
515 }
516
517 #[must_use]
519 pub fn query_links(&self, query: &LinkQuery) -> Vec<&Link> {
520 self.links().filter(|link| query.matches(link)).collect()
521 }
522
523 pub fn insert_point(&mut self, term: &str) -> LinkId {
525 self.insert_typed_point(term, LinkType::Concept, None)
526 }
527
528 pub fn insert_object(&mut self, term: &str) -> LinkId {
530 self.insert_typed_point(term, LinkType::Object, None)
531 }
532
533 pub fn insert_relation<const N: usize>(
535 &mut self,
536 references: [LinkId; N],
537 link_type: LinkType,
538 span: SourceSpan,
539 ) -> LinkId {
540 self.insert_link(
541 references,
542 LinkMetadata::new()
543 .with_link_type(link_type)
544 .with_span(span),
545 )
546 }
547
548 pub fn insert_field(&mut self, parent: LinkId, label: &str, child: LinkId) -> LinkId {
550 let label_link = self.insert_typed_point(
551 label,
552 LinkType::Field,
553 Some("A field label names a relation between links."),
554 );
555 self.insert_link(
556 [parent, label_link, child],
557 LinkMetadata::new().with_link_type(LinkType::Field),
558 )
559 }
560
561 pub fn insert_link<const N: usize>(
563 &mut self,
564 references: [LinkId; N],
565 metadata: LinkMetadata,
566 ) -> LinkId {
567 let id = self.allocate_id();
568 self.links.insert(
569 id,
570 Link {
571 id,
572 references: references.to_vec(),
573 metadata,
574 },
575 );
576 id
577 }
578
579 pub fn insert_concept_mapping(
581 &mut self,
582 concept: &str,
583 language: &str,
584 syntax: &str,
585 ) -> LinkId {
586 let concept_link = self.insert_typed_point(
587 concept,
588 LinkType::Concept,
589 Some("A concept mapping connects shared meaning to language syntax."),
590 );
591 let language_link = self.insert_typed_point(language, LinkType::Language, None);
592 let mapping = self.insert_link(
593 [concept_link, language_link],
594 LinkMetadata::new()
595 .with_link_type(LinkType::Semantic)
596 .with_named(true)
597 .with_term(syntax)
598 .with_language(language),
599 );
600 self.concept_syntax.insert(
601 (concept.to_string(), language.to_string()),
602 syntax.to_string(),
603 );
604 mapping
605 }
606
607 #[must_use]
609 pub fn reconstruct_concept(&self, concept: &str, language: &str) -> Option<&str> {
610 self.concept_syntax
611 .get(&(concept.to_string(), language.to_string()))
612 .map(String::as_str)
613 }
614
615 pub fn apply_substitution(&mut self, rule: &SubstitutionRule) -> SubstitutionReport {
617 let mut report = SubstitutionReport::default();
618
619 if rule.pattern().is_empty() {
620 if !rule.replacement().is_empty() {
621 let created = self.insert_dynamic_link(
622 rule.replacement(),
623 LinkMetadata::new().with_link_type(LinkType::Relation),
624 );
625 report.created.push(created);
626 }
627 return report;
628 }
629
630 let matched = self
631 .links()
632 .filter(|link| link.references() == rule.pattern())
633 .map(Link::id)
634 .collect::<Vec<_>>();
635
636 if rule.replacement().is_empty() {
637 for id in matched {
638 if self.links.remove(&id).is_some() {
639 report.deleted.push(id);
640 }
641 }
642 return report;
643 }
644
645 for id in matched {
646 if let Some(link) = self.links.get_mut(&id) {
647 link.references = rule.replacement().to_vec();
648 report.updated.push(id);
649 }
650 }
651
652 report
653 }
654
655 #[must_use]
657 pub fn link(&self, id: LinkId) -> Option<&Link> {
658 self.links.get(&id)
659 }
660
661 #[must_use]
663 pub fn find_term(&self, term: &str) -> Option<LinkId> {
664 self.terms.get(term).copied()
665 }
666
667 #[must_use]
669 pub fn definition_for(&self, id: LinkId) -> Option<&str> {
670 self.link(id).and_then(|link| link.metadata().definition())
671 }
672
673 pub fn set_span(&mut self, id: LinkId, span: SourceSpan) -> bool {
675 let Some(link) = self.links.get_mut(&id) else {
676 return false;
677 };
678 link.metadata_mut().span = Some(span);
679 true
680 }
681
682 pub fn set_flags(&mut self, id: LinkId, flags: LinkFlags) -> bool {
684 let Some(link) = self.links.get_mut(&id) else {
685 return false;
686 };
687 link.metadata_mut().flags = flags;
688 true
689 }
690
691 #[must_use]
693 pub fn verify_full_match(&self, region: Option<ByteRange>) -> VerificationReport {
694 let issues = self
695 .links
696 .values()
697 .filter(|link| link_is_in_region(link, region))
698 .filter_map(|link| {
699 let flags = link.metadata().flags();
700 let kind = if flags.is_error() {
701 VerificationIssueKind::ErrorLink
702 } else if flags.is_missing() {
703 VerificationIssueKind::MissingLink
704 } else if flags.has_error() {
705 VerificationIssueKind::HasErrorLink
706 } else {
707 return None;
708 };
709
710 Some(VerificationIssue::new(
711 link.id(),
712 kind,
713 link.metadata().span(),
714 ))
715 })
716 .collect();
717 VerificationReport::new(issues)
718 }
719
720 fn insert_typed_point(
721 &mut self,
722 term: &str,
723 link_type: LinkType,
724 definition: Option<&str>,
725 ) -> LinkId {
726 if let Some(id) = self.terms.get(term).copied() {
727 if let Some(definition) = definition {
728 if let Some(link) = self.links.get_mut(&id) {
729 link.metadata_mut().definition = Some(definition.to_string());
730 }
731 }
732 return id;
733 }
734
735 let id = self.allocate_id();
736 let mut metadata = LinkMetadata::new()
737 .with_link_type(link_type)
738 .with_named(true)
739 .with_term(term);
740 if let Some(definition) = definition {
741 metadata = metadata.with_definition(definition);
742 }
743 self.links.insert(
744 id,
745 Link {
746 id,
747 references: vec![id],
748 metadata,
749 },
750 );
751 self.terms.insert(term.to_string(), id);
752 id
753 }
754
755 pub(crate) fn attach_trivia(
756 &mut self,
757 document: LinkId,
758 token: LinkId,
759 span: SourceSpan,
760 policy: TriviaAttachmentPolicy,
761 ) {
762 match policy {
763 TriviaAttachmentPolicy::ContainmentLink => {
764 self.insert_containment_trivia(document, token, span);
765 }
766 TriviaAttachmentPolicy::TokenLink => {
767 self.insert_token_trivia(token, span);
768 }
769 TriviaAttachmentPolicy::Both => {
770 self.insert_containment_trivia(document, token, span);
771 self.insert_token_trivia(token, span);
772 }
773 }
774 }
775
776 fn insert_containment_trivia(&mut self, document: LinkId, token: LinkId, span: SourceSpan) {
777 self.insert_link(
778 [document, token],
779 LinkMetadata::new()
780 .with_link_type(LinkType::Trivia)
781 .with_term("containment trivia")
782 .with_span(span)
783 .with_flags(LinkFlags::extra()),
784 );
785 }
786
787 fn insert_token_trivia(&mut self, token: LinkId, span: SourceSpan) {
788 self.insert_link(
789 [token],
790 LinkMetadata::new()
791 .with_link_type(LinkType::Trivia)
792 .with_term("token trivia")
793 .with_span(span)
794 .with_flags(LinkFlags::extra()),
795 );
796 }
797
798 fn insert_dynamic_link(&mut self, references: &[LinkId], metadata: LinkMetadata) -> LinkId {
799 let id = self.allocate_id();
800 self.links.insert(
801 id,
802 Link {
803 id,
804 references: references.to_vec(),
805 metadata,
806 },
807 );
808 id
809 }
810
811 pub(crate) fn attach_embedded_regions(
812 &mut self,
813 document: LinkId,
814 text: &str,
815 language: &str,
816 policy: RegionDetectionPolicy,
817 ) {
818 for region in detect_embedded_regions(text, language, policy) {
819 let region_language = region.language().to_string();
820 let language_link = self.insert_typed_point(®ion_language, LinkType::Language, None);
821 self.insert_link(
822 [document, language_link],
823 LinkMetadata::new()
824 .with_link_type(LinkType::Region)
825 .with_named(true)
826 .with_term(format!("{region_language} region"))
827 .with_language(region_language)
828 .with_span(region.span()),
829 );
830 }
831 }
832
833 const fn allocate_id(&mut self) -> LinkId {
834 let id = LinkId(self.next_id);
835 self.next_id += 1;
836 id
837 }
838}
839
840fn link_is_in_region(link: &Link, region: Option<ByteRange>) -> bool {
841 let Some(region) = region else {
842 return true;
843 };
844 link.metadata()
845 .span()
846 .is_some_and(|span| span.byte_range().intersects(region))
847}
848
849fn end_point_for_text(text: &str) -> Point {
850 let mut row = 0;
851 let mut column = 0;
852 for character in text.chars() {
853 if character == '\n' {
854 row += 1;
855 column = 0;
856 } else {
857 column += 1;
858 }
859 }
860 Point::new(row, column)
861}