1use std::collections::BTreeMap;
2use std::fmt;
3
4use crate::configuration::{ParseConfiguration, RegionDetectionPolicy, TriviaAttachmentPolicy};
5use crate::link_flags::LinkFlags;
6use crate::mixed_regions::{detect_embedded_regions, EmbeddedRegion};
7use crate::query::LinkQuery;
8use crate::source::{ByteRange, Point, SourceSpan};
9use crate::substitution::{SubstitutionReport, SubstitutionRule};
10use crate::verification::{VerificationIssue, VerificationIssueKind, VerificationReport};
11
12#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
14pub struct LinkId(u64);
15
16impl LinkId {
17 #[must_use]
19 pub const fn as_u64(self) -> u64 {
20 self.0
21 }
22}
23
24impl fmt::Display for LinkId {
25 fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
26 write!(formatter, "{}", self.0)
27 }
28}
29
30#[derive(Clone, Copy, Debug, PartialEq, Eq)]
32pub enum LinkType {
33 Link,
34 Reference,
35 Relation,
36 Language,
37 Grammar,
38 Type,
39 Concept,
40 Syntax,
41 Field,
42 Trivia,
43 Token,
44 Document,
45 Semantic,
46 Region,
47 Object,
48}
49
50#[derive(Clone, Copy, Debug, PartialEq, Eq)]
52pub enum NetworkProjection {
53 Lossless,
55 ConcreteSyntax,
57 AbstractSyntax,
59 Semantic,
61}
62
63impl NetworkProjection {
64 #[must_use]
66 pub const fn label(self) -> &'static str {
67 match self {
68 Self::Lossless => "lossless",
69 Self::ConcreteSyntax => "concrete syntax",
70 Self::AbstractSyntax => "abstract syntax",
71 Self::Semantic => "semantic",
72 }
73 }
74
75 fn includes(self, link: &Link) -> bool {
76 match self {
77 Self::Lossless => true,
78 Self::ConcreteSyntax => link.metadata().link_type() != Some(LinkType::Semantic),
79 Self::AbstractSyntax => !matches!(
80 link.metadata().link_type(),
81 Some(LinkType::Token | LinkType::Trivia)
82 ),
83 Self::Semantic => matches!(
84 link.metadata().link_type(),
85 Some(LinkType::Semantic | LinkType::Concept | LinkType::Type | LinkType::Language)
86 ),
87 }
88 }
89}
90
91impl fmt::Display for LinkType {
92 fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
93 let name = match self {
94 Self::Link => "link",
95 Self::Reference => "reference",
96 Self::Relation => "relation",
97 Self::Language => "language",
98 Self::Grammar => "grammar",
99 Self::Type => "type",
100 Self::Concept => "concept",
101 Self::Syntax => "syntax",
102 Self::Field => "field",
103 Self::Trivia => "trivia",
104 Self::Token => "token",
105 Self::Document => "document",
106 Self::Semantic => "semantic",
107 Self::Region => "region",
108 Self::Object => "object",
109 };
110 formatter.write_str(name)
111 }
112}
113
114#[derive(Clone, Debug, Default, PartialEq, Eq)]
116pub struct LinkMetadata {
117 link_type: Option<LinkType>,
118 named: bool,
119 term: Option<String>,
120 definition: Option<String>,
121 language: Option<String>,
122 span: Option<SourceSpan>,
123 flags: LinkFlags,
124}
125
126impl LinkMetadata {
127 #[must_use]
129 pub fn new() -> Self {
130 Self::default()
131 }
132
133 #[must_use]
135 pub const fn with_link_type(mut self, link_type: LinkType) -> Self {
136 self.link_type = Some(link_type);
137 self
138 }
139
140 #[must_use]
142 pub const fn with_named(mut self, named: bool) -> Self {
143 self.named = named;
144 self
145 }
146
147 #[must_use]
149 pub fn with_term(mut self, term: impl Into<String>) -> Self {
150 self.term = Some(term.into());
151 self
152 }
153
154 #[must_use]
156 pub fn with_definition(mut self, definition: impl Into<String>) -> Self {
157 self.definition = Some(definition.into());
158 self
159 }
160
161 #[must_use]
163 pub fn with_language(mut self, language: impl Into<String>) -> Self {
164 self.language = Some(language.into());
165 self
166 }
167
168 #[must_use]
170 pub const fn with_span(mut self, span: SourceSpan) -> Self {
171 self.span = Some(span);
172 self
173 }
174
175 #[must_use]
177 pub const fn with_flags(mut self, flags: LinkFlags) -> Self {
178 self.flags = flags;
179 self
180 }
181
182 #[must_use]
184 pub const fn link_type(&self) -> Option<LinkType> {
185 self.link_type
186 }
187
188 #[must_use]
190 pub const fn is_named(&self) -> bool {
191 self.named
192 }
193
194 #[must_use]
196 pub fn term(&self) -> Option<&str> {
197 self.term.as_deref()
198 }
199
200 #[must_use]
202 pub fn definition(&self) -> Option<&str> {
203 self.definition.as_deref()
204 }
205
206 #[must_use]
208 pub fn language(&self) -> Option<&str> {
209 self.language.as_deref()
210 }
211
212 #[must_use]
214 pub const fn span(&self) -> Option<SourceSpan> {
215 self.span
216 }
217
218 #[must_use]
220 pub const fn flags(&self) -> LinkFlags {
221 self.flags
222 }
223}
224
225#[derive(Clone, Debug, PartialEq, Eq)]
227pub struct Link {
228 id: LinkId,
229 references: Vec<LinkId>,
230 metadata: LinkMetadata,
231}
232
233impl Link {
234 #[must_use]
236 pub const fn id(&self) -> LinkId {
237 self.id
238 }
239
240 #[must_use]
242 pub fn references(&self) -> &[LinkId] {
243 &self.references
244 }
245
246 #[must_use]
248 pub const fn metadata(&self) -> &LinkMetadata {
249 &self.metadata
250 }
251
252 const fn metadata_mut(&mut self) -> &mut LinkMetadata {
253 &mut self.metadata
254 }
255}
256
257#[derive(Clone, Debug, Default, PartialEq, Eq)]
259pub struct LinkNetwork {
260 next_id: u64,
261 links: BTreeMap<LinkId, Link>,
262 terms: BTreeMap<String, LinkId>,
263 concept_syntax: BTreeMap<(String, String), String>,
264}
265
266impl LinkNetwork {
267 #[must_use]
269 pub const fn new() -> Self {
270 Self {
271 next_id: 1,
272 links: BTreeMap::new(),
273 terms: BTreeMap::new(),
274 concept_syntax: BTreeMap::new(),
275 }
276 }
277
278 #[must_use]
280 pub fn self_describing() -> Self {
281 let mut network = Self::new();
282 network.insert_typed_point(
283 "link",
284 LinkType::Link,
285 Some("A link is an n-tuple of references to links."),
286 );
287 network.insert_typed_point(
288 "reference",
289 LinkType::Reference,
290 Some("A reference is one position in a link that points to another link."),
291 );
292 network.insert_typed_point(
293 "relation link",
294 LinkType::Relation,
295 Some("A relation link connects references to other links and is itself a link."),
296 );
297 network.insert_typed_point(
298 "language",
299 LinkType::Language,
300 Some("A language is a set of grammar, syntax, and semantic links."),
301 );
302 network.insert_typed_point(
303 "grammar",
304 LinkType::Grammar,
305 Some("A grammar describes which syntax links fully match a language."),
306 );
307 network.insert_typed_point(
308 "type",
309 LinkType::Type,
310 Some("A type is a link that constrains or classifies other links."),
311 );
312 network.insert_typed_point(
313 "concept",
314 LinkType::Concept,
315 Some("A concept is a shared meaning link that multiple languages can reference."),
316 );
317 network.insert_typed_point(
318 "point",
319 LinkType::Concept,
320 Some("A point is represented as a self-referential link."),
321 );
322 network.insert_typed_point(
323 "field",
324 LinkType::Field,
325 Some("A field is a labeled relation link from a parent link to a child link."),
326 );
327 network.insert_typed_point(
328 "trivia",
329 LinkType::Trivia,
330 Some("Trivia is source text preserved by explicit attachment links."),
331 );
332 network.insert_typed_point(
333 "region",
334 LinkType::Region,
335 Some("A region is a source span with a selected or detected language."),
336 );
337 network.insert_typed_point(
338 "object",
339 LinkType::Object,
340 Some("An object identity is represented by a link that other links can share."),
341 );
342 network
343 }
344
345 #[must_use]
351 pub fn parse(text: &str, language: &str, configuration: ParseConfiguration) -> Self {
352 Self::parse_lossless_text(text, language, configuration)
353 }
354
355 #[must_use]
361 pub fn parse_lossless_text(
362 text: &str,
363 language: &str,
364 configuration: ParseConfiguration,
365 ) -> Self {
366 let mut network = Self::self_describing();
367 let language_link = network.insert_typed_point(language, LinkType::Language, None);
368 let document_span = SourceSpan::new(
369 ByteRange::new(0, text.len()),
370 Point::new(0, 0),
371 end_point_for_text(text),
372 );
373 let document = network.insert_link(
374 [language_link],
375 LinkMetadata::new()
376 .with_link_type(LinkType::Document)
377 .with_named(true)
378 .with_term(format!("{language} document"))
379 .with_language(language)
380 .with_span(document_span),
381 );
382
383 let mut row = 0;
384 let mut column = 0;
385 let mut open_parentheses = Vec::new();
386 for (start, character) in text.char_indices() {
387 let start_point = Point::new(row, column);
388 let end = start + character.len_utf8();
389 if character == '\n' {
390 row += 1;
391 column = 0;
392 } else {
393 column += 1;
394 }
395 let end_point = Point::new(row, column);
396 let span = SourceSpan::new(ByteRange::new(start, end), start_point, end_point);
397 let mut metadata = LinkMetadata::new()
398 .with_link_type(LinkType::Token)
399 .with_named(!character.is_whitespace())
400 .with_term(character.to_string())
401 .with_language(language)
402 .with_span(span);
403
404 if character.is_whitespace() {
405 metadata = metadata.with_flags(LinkFlags::extra());
406 }
407
408 let token = network.insert_link([document], metadata);
409 match character {
410 '(' => open_parentheses.push(token),
411 ')' if open_parentheses.pop().is_none() => {
412 network.set_flags(token, LinkFlags::error());
413 }
414 _ => {}
415 }
416 if character.is_whitespace() {
417 network.attach_trivia(
418 document,
419 token,
420 span,
421 configuration.trivia_attachment_policy(),
422 );
423 }
424 }
425
426 let missing_span = SourceSpan::new(
427 ByteRange::new(text.len(), text.len()),
428 end_point_for_text(text),
429 end_point_for_text(text),
430 );
431 for open_parenthesis in open_parentheses {
432 network.set_flags(open_parenthesis, LinkFlags::containing_error());
433 network.insert_link(
434 [document],
435 LinkMetadata::new()
436 .with_link_type(LinkType::Token)
437 .with_named(false)
438 .with_term(")")
439 .with_language(language)
440 .with_span(missing_span)
441 .with_flags(LinkFlags::missing()),
442 );
443 }
444
445 network.attach_embedded_regions(
446 document,
447 text,
448 language,
449 configuration.region_detection_policy(),
450 );
451
452 network
453 }
454
455 #[must_use]
457 pub fn len(&self) -> usize {
458 self.links.len()
459 }
460
461 #[must_use]
463 pub fn is_empty(&self) -> bool {
464 self.links.is_empty()
465 }
466
467 pub fn links(&self) -> impl Iterator<Item = &Link> {
469 self.links.values()
470 }
471
472 pub fn projected_links(&self, projection: NetworkProjection) -> impl Iterator<Item = &Link> {
474 self.links().filter(move |link| projection.includes(link))
475 }
476
477 #[must_use]
479 pub fn reconstruct_text(&self) -> String {
480 let mut tokens = self
481 .links()
482 .filter(|link| link.metadata().link_type() == Some(LinkType::Token))
483 .filter(|link| !link.metadata().flags().is_missing())
484 .filter_map(|link| {
485 Some((
486 link.metadata().span()?.byte_range().start(),
487 link.id().as_u64(),
488 link.metadata().term()?.to_string(),
489 ))
490 })
491 .collect::<Vec<_>>();
492
493 tokens.sort_by_key(|(start, id, _term)| (*start, *id));
494 tokens.into_iter().map(|(_start, _id, term)| term).collect()
495 }
496
497 #[must_use]
499 pub fn embedded_regions(&self) -> Vec<EmbeddedRegion> {
500 self.links()
501 .filter(|link| link.metadata().link_type() == Some(LinkType::Region))
502 .filter_map(|link| {
503 Some(EmbeddedRegion::new(
504 link.metadata().language()?.to_string(),
505 link.metadata().span()?,
506 ))
507 })
508 .collect()
509 }
510
511 #[must_use]
513 pub fn query_links(&self, query: &LinkQuery) -> Vec<&Link> {
514 self.links().filter(|link| query.matches(link)).collect()
515 }
516
517 pub fn insert_point(&mut self, term: &str) -> LinkId {
519 self.insert_typed_point(term, LinkType::Concept, None)
520 }
521
522 pub fn insert_object(&mut self, term: &str) -> LinkId {
524 self.insert_typed_point(term, LinkType::Object, None)
525 }
526
527 pub fn insert_relation<const N: usize>(
529 &mut self,
530 references: [LinkId; N],
531 link_type: LinkType,
532 span: SourceSpan,
533 ) -> LinkId {
534 self.insert_link(
535 references,
536 LinkMetadata::new()
537 .with_link_type(link_type)
538 .with_span(span),
539 )
540 }
541
542 pub fn insert_field(&mut self, parent: LinkId, label: &str, child: LinkId) -> LinkId {
544 let label_link = self.insert_typed_point(
545 label,
546 LinkType::Field,
547 Some("A field label names a relation between links."),
548 );
549 self.insert_link(
550 [parent, label_link, child],
551 LinkMetadata::new().with_link_type(LinkType::Field),
552 )
553 }
554
555 pub fn insert_link<const N: usize>(
557 &mut self,
558 references: [LinkId; N],
559 metadata: LinkMetadata,
560 ) -> LinkId {
561 let id = self.allocate_id();
562 self.links.insert(
563 id,
564 Link {
565 id,
566 references: references.to_vec(),
567 metadata,
568 },
569 );
570 id
571 }
572
573 pub fn insert_concept_mapping(
575 &mut self,
576 concept: &str,
577 language: &str,
578 syntax: &str,
579 ) -> LinkId {
580 let concept_link = self.insert_typed_point(
581 concept,
582 LinkType::Concept,
583 Some("A concept mapping connects shared meaning to language syntax."),
584 );
585 let language_link = self.insert_typed_point(language, LinkType::Language, None);
586 let mapping = self.insert_link(
587 [concept_link, language_link],
588 LinkMetadata::new()
589 .with_link_type(LinkType::Semantic)
590 .with_named(true)
591 .with_term(syntax)
592 .with_language(language),
593 );
594 self.concept_syntax.insert(
595 (concept.to_string(), language.to_string()),
596 syntax.to_string(),
597 );
598 mapping
599 }
600
601 #[must_use]
603 pub fn reconstruct_concept(&self, concept: &str, language: &str) -> Option<&str> {
604 self.concept_syntax
605 .get(&(concept.to_string(), language.to_string()))
606 .map(String::as_str)
607 }
608
609 pub fn apply_substitution(&mut self, rule: &SubstitutionRule) -> SubstitutionReport {
611 let mut report = SubstitutionReport::default();
612
613 if rule.pattern().is_empty() {
614 if !rule.replacement().is_empty() {
615 let created = self.insert_dynamic_link(
616 rule.replacement(),
617 LinkMetadata::new().with_link_type(LinkType::Relation),
618 );
619 report.created.push(created);
620 }
621 return report;
622 }
623
624 let matched = self
625 .links()
626 .filter(|link| link.references() == rule.pattern())
627 .map(Link::id)
628 .collect::<Vec<_>>();
629
630 if rule.replacement().is_empty() {
631 for id in matched {
632 if self.links.remove(&id).is_some() {
633 report.deleted.push(id);
634 }
635 }
636 return report;
637 }
638
639 for id in matched {
640 if let Some(link) = self.links.get_mut(&id) {
641 link.references = rule.replacement().to_vec();
642 report.updated.push(id);
643 }
644 }
645
646 report
647 }
648
649 #[must_use]
651 pub fn link(&self, id: LinkId) -> Option<&Link> {
652 self.links.get(&id)
653 }
654
655 #[must_use]
657 pub fn find_term(&self, term: &str) -> Option<LinkId> {
658 self.terms.get(term).copied()
659 }
660
661 #[must_use]
663 pub fn definition_for(&self, id: LinkId) -> Option<&str> {
664 self.link(id).and_then(|link| link.metadata().definition())
665 }
666
667 pub fn set_span(&mut self, id: LinkId, span: SourceSpan) -> bool {
669 let Some(link) = self.links.get_mut(&id) else {
670 return false;
671 };
672 link.metadata_mut().span = Some(span);
673 true
674 }
675
676 pub fn set_flags(&mut self, id: LinkId, flags: LinkFlags) -> bool {
678 let Some(link) = self.links.get_mut(&id) else {
679 return false;
680 };
681 link.metadata_mut().flags = flags;
682 true
683 }
684
685 #[must_use]
687 pub fn verify_full_match(&self, region: Option<ByteRange>) -> VerificationReport {
688 let issues = self
689 .links
690 .values()
691 .filter(|link| link_is_in_region(link, region))
692 .filter_map(|link| {
693 let flags = link.metadata().flags();
694 let kind = if flags.is_error() {
695 VerificationIssueKind::ErrorLink
696 } else if flags.is_missing() {
697 VerificationIssueKind::MissingLink
698 } else if flags.has_error() {
699 VerificationIssueKind::HasErrorLink
700 } else {
701 return None;
702 };
703
704 Some(VerificationIssue::new(
705 link.id(),
706 kind,
707 link.metadata().span(),
708 ))
709 })
710 .collect();
711 VerificationReport::new(issues)
712 }
713
714 fn insert_typed_point(
715 &mut self,
716 term: &str,
717 link_type: LinkType,
718 definition: Option<&str>,
719 ) -> LinkId {
720 if let Some(id) = self.terms.get(term).copied() {
721 if let Some(definition) = definition {
722 if let Some(link) = self.links.get_mut(&id) {
723 link.metadata_mut().definition = Some(definition.to_string());
724 }
725 }
726 return id;
727 }
728
729 let id = self.allocate_id();
730 let mut metadata = LinkMetadata::new()
731 .with_link_type(link_type)
732 .with_named(true)
733 .with_term(term);
734 if let Some(definition) = definition {
735 metadata = metadata.with_definition(definition);
736 }
737 self.links.insert(
738 id,
739 Link {
740 id,
741 references: vec![id],
742 metadata,
743 },
744 );
745 self.terms.insert(term.to_string(), id);
746 id
747 }
748
749 fn attach_trivia(
750 &mut self,
751 document: LinkId,
752 token: LinkId,
753 span: SourceSpan,
754 policy: TriviaAttachmentPolicy,
755 ) {
756 match policy {
757 TriviaAttachmentPolicy::ContainmentLink => {
758 self.insert_containment_trivia(document, token, span);
759 }
760 TriviaAttachmentPolicy::TokenLink => {
761 self.insert_token_trivia(token, span);
762 }
763 TriviaAttachmentPolicy::Both => {
764 self.insert_containment_trivia(document, token, span);
765 self.insert_token_trivia(token, span);
766 }
767 }
768 }
769
770 fn insert_containment_trivia(&mut self, document: LinkId, token: LinkId, span: SourceSpan) {
771 self.insert_link(
772 [document, token],
773 LinkMetadata::new()
774 .with_link_type(LinkType::Trivia)
775 .with_term("containment trivia")
776 .with_span(span)
777 .with_flags(LinkFlags::extra()),
778 );
779 }
780
781 fn insert_token_trivia(&mut self, token: LinkId, span: SourceSpan) {
782 self.insert_link(
783 [token],
784 LinkMetadata::new()
785 .with_link_type(LinkType::Trivia)
786 .with_term("token trivia")
787 .with_span(span)
788 .with_flags(LinkFlags::extra()),
789 );
790 }
791
792 fn insert_dynamic_link(&mut self, references: &[LinkId], metadata: LinkMetadata) -> LinkId {
793 let id = self.allocate_id();
794 self.links.insert(
795 id,
796 Link {
797 id,
798 references: references.to_vec(),
799 metadata,
800 },
801 );
802 id
803 }
804
805 fn attach_embedded_regions(
806 &mut self,
807 document: LinkId,
808 text: &str,
809 language: &str,
810 policy: RegionDetectionPolicy,
811 ) {
812 for region in detect_embedded_regions(text, language, policy) {
813 let region_language = region.language().to_string();
814 let language_link = self.insert_typed_point(®ion_language, LinkType::Language, None);
815 self.insert_link(
816 [document, language_link],
817 LinkMetadata::new()
818 .with_link_type(LinkType::Region)
819 .with_named(true)
820 .with_term(format!("{region_language} region"))
821 .with_language(region_language)
822 .with_span(region.span()),
823 );
824 }
825 }
826
827 const fn allocate_id(&mut self) -> LinkId {
828 let id = LinkId(self.next_id);
829 self.next_id += 1;
830 id
831 }
832}
833
834fn link_is_in_region(link: &Link, region: Option<ByteRange>) -> bool {
835 let Some(region) = region else {
836 return true;
837 };
838 link.metadata()
839 .span()
840 .is_some_and(|span| span.byte_range().intersects(region))
841}
842
843fn end_point_for_text(text: &str) -> Point {
844 let mut row = 0;
845 let mut column = 0;
846 for character in text.chars() {
847 if character == '\n' {
848 row += 1;
849 column = 0;
850 } else {
851 column += 1;
852 }
853 }
854 Point::new(row, column)
855}