Skip to main content

tstring_syntax/
lib.rs

1use std::collections::BTreeMap;
2
3use num_bigint::BigInt;
4
5#[derive(Clone, Debug, PartialEq, Eq)]
6pub struct SourcePosition {
7    pub token_index: usize,
8    pub offset: usize,
9}
10
11#[derive(Clone, Debug, PartialEq, Eq)]
12pub struct SourceSpan {
13    pub start: SourcePosition,
14    pub end: SourcePosition,
15}
16
17impl SourceSpan {
18    #[must_use]
19    pub fn point(token_index: usize, offset: usize) -> Self {
20        let position = SourcePosition {
21            token_index,
22            offset,
23        };
24        Self {
25            start: position.clone(),
26            end: position,
27        }
28    }
29
30    #[must_use]
31    pub fn between(start: SourcePosition, end: SourcePosition) -> Self {
32        Self { start, end }
33    }
34
35    #[must_use]
36    pub fn extend(&self, end: SourcePosition) -> Self {
37        Self {
38            start: self.start.clone(),
39            end,
40        }
41    }
42
43    #[must_use]
44    pub fn merge(&self, other: &Self) -> Self {
45        Self {
46            start: self.start.clone(),
47            end: other.end.clone(),
48        }
49    }
50}
51
52#[derive(Clone, Copy, Debug, PartialEq, Eq)]
53pub enum DiagnosticSeverity {
54    Error,
55    Warning,
56}
57
58#[derive(Clone, Debug, PartialEq, Eq)]
59pub struct Diagnostic {
60    pub code: String,
61    pub message: String,
62    pub severity: DiagnosticSeverity,
63    pub span: Option<SourceSpan>,
64    pub metadata: BTreeMap<String, String>,
65}
66
67impl Diagnostic {
68    #[must_use]
69    pub fn error(
70        code: impl Into<String>,
71        message: impl Into<String>,
72        span: Option<SourceSpan>,
73    ) -> Self {
74        Self {
75            code: code.into(),
76            message: message.into(),
77            severity: DiagnosticSeverity::Error,
78            span,
79            metadata: BTreeMap::new(),
80        }
81    }
82}
83
84#[derive(Clone, Copy, Debug, PartialEq, Eq)]
85pub enum ErrorKind {
86    Parse,
87    Semantic,
88    Unrepresentable,
89}
90
91#[derive(Clone, Debug, PartialEq, Eq)]
92pub struct BackendError {
93    pub kind: ErrorKind,
94    pub message: String,
95    pub diagnostics: Vec<Diagnostic>,
96}
97
98impl BackendError {
99    #[must_use]
100    pub fn parse(message: impl Into<String>) -> Self {
101        Self::new(ErrorKind::Parse, "tstring.parse", message, None)
102    }
103
104    #[must_use]
105    pub fn parse_at(
106        code: impl Into<String>,
107        message: impl Into<String>,
108        span: impl Into<Option<SourceSpan>>,
109    ) -> Self {
110        Self::new(ErrorKind::Parse, code, message, span.into())
111    }
112
113    #[must_use]
114    pub fn semantic(message: impl Into<String>) -> Self {
115        Self::new(ErrorKind::Semantic, "tstring.semantic", message, None)
116    }
117
118    #[must_use]
119    pub fn semantic_at(
120        code: impl Into<String>,
121        message: impl Into<String>,
122        span: impl Into<Option<SourceSpan>>,
123    ) -> Self {
124        Self::new(ErrorKind::Semantic, code, message, span.into())
125    }
126
127    #[must_use]
128    pub fn unrepresentable(message: impl Into<String>) -> Self {
129        Self::new(
130            ErrorKind::Unrepresentable,
131            "tstring.unrepresentable",
132            message,
133            None,
134        )
135    }
136
137    #[must_use]
138    pub fn unrepresentable_at(
139        code: impl Into<String>,
140        message: impl Into<String>,
141        span: impl Into<Option<SourceSpan>>,
142    ) -> Self {
143        Self::new(ErrorKind::Unrepresentable, code, message, span.into())
144    }
145
146    #[must_use]
147    pub fn with_metadata(mut self, key: impl Into<String>, value: impl Into<String>) -> Self {
148        if let Some(primary) = self.diagnostics.first_mut() {
149            primary.metadata.insert(key.into(), value.into());
150        }
151        self
152    }
153
154    fn new(
155        kind: ErrorKind,
156        code: impl Into<String>,
157        message: impl Into<String>,
158        span: Option<SourceSpan>,
159    ) -> Self {
160        let message = message.into();
161        Self {
162            kind,
163            diagnostics: vec![Diagnostic::error(code, message.clone(), span)],
164            message,
165        }
166    }
167}
168
169impl std::fmt::Display for BackendError {
170    fn fmt(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
171        formatter.write_str(&self.message)
172    }
173}
174
175impl std::error::Error for BackendError {}
176
177pub type BackendResult<T> = Result<T, BackendError>;
178
179#[derive(Clone, Debug, PartialEq)]
180pub struct NormalizedStream {
181    pub documents: Vec<NormalizedDocument>,
182}
183
184impl NormalizedStream {
185    #[must_use]
186    pub fn new(documents: Vec<NormalizedDocument>) -> Self {
187        Self { documents }
188    }
189}
190
191#[derive(Clone, Debug, PartialEq)]
192pub enum NormalizedDocument {
193    Empty,
194    Value(NormalizedValue),
195}
196
197#[derive(Clone, Debug, PartialEq)]
198pub enum NormalizedValue {
199    Null,
200    Bool(bool),
201    Integer(BigInt),
202    Float(NormalizedFloat),
203    String(String),
204    Temporal(NormalizedTemporal),
205    Sequence(Vec<NormalizedValue>),
206    Mapping(Vec<NormalizedEntry>),
207    Set(Vec<NormalizedKey>),
208}
209
210#[derive(Clone, Debug, PartialEq)]
211pub struct NormalizedEntry {
212    pub key: NormalizedKey,
213    pub value: NormalizedValue,
214}
215
216#[derive(Clone, Debug, PartialEq)]
217pub enum NormalizedKey {
218    Null,
219    Bool(bool),
220    Integer(BigInt),
221    Float(NormalizedFloat),
222    String(String),
223    Temporal(NormalizedTemporal),
224    Sequence(Vec<NormalizedKey>),
225    Mapping(Vec<NormalizedKeyEntry>),
226}
227
228#[derive(Clone, Debug, PartialEq)]
229pub struct NormalizedKeyEntry {
230    pub key: NormalizedKey,
231    pub value: NormalizedKey,
232}
233
234#[derive(Clone, Copy, Debug, PartialEq)]
235pub enum NormalizedFloat {
236    Finite(f64),
237    PosInf,
238    NegInf,
239    NaN,
240}
241
242#[derive(Clone, Debug, PartialEq, Eq)]
243pub enum NormalizedTemporal {
244    OffsetDateTime(NormalizedOffsetDateTime),
245    LocalDateTime(NormalizedLocalDateTime),
246    LocalDate(NormalizedDate),
247    LocalTime(NormalizedTime),
248}
249
250#[derive(Clone, Debug, PartialEq, Eq)]
251pub struct NormalizedOffsetDateTime {
252    pub date: NormalizedDate,
253    pub time: NormalizedTime,
254    pub offset_minutes: i16,
255}
256
257#[derive(Clone, Debug, PartialEq, Eq)]
258pub struct NormalizedLocalDateTime {
259    pub date: NormalizedDate,
260    pub time: NormalizedTime,
261}
262
263#[derive(Clone, Copy, Debug, PartialEq, Eq)]
264pub struct NormalizedDate {
265    pub year: i32,
266    pub month: u8,
267    pub day: u8,
268}
269
270#[derive(Clone, Copy, Debug, PartialEq, Eq)]
271pub struct NormalizedTime {
272    pub hour: u8,
273    pub minute: u8,
274    pub second: u8,
275    pub nanosecond: u32,
276}
277
278impl NormalizedFloat {
279    #[must_use]
280    pub fn finite(value: f64) -> Self {
281        debug_assert!(value.is_finite());
282        Self::Finite(value)
283    }
284}
285
286#[derive(Clone, Debug, PartialEq, Eq)]
287pub struct TemplateInterpolation {
288    pub expression: String,
289    pub conversion: Option<String>,
290    pub format_spec: String,
291    pub interpolation_index: usize,
292    pub raw_source: Option<String>,
293}
294
295impl TemplateInterpolation {
296    #[must_use]
297    pub fn expression_label(&self) -> &str {
298        if self.expression.is_empty() {
299            "slot"
300        } else {
301            &self.expression
302        }
303    }
304}
305
306#[derive(Clone, Debug, PartialEq, Eq)]
307pub struct StaticTextToken {
308    pub text: String,
309    pub token_index: usize,
310    pub span: SourceSpan,
311}
312
313#[derive(Clone, Debug, PartialEq, Eq)]
314pub struct InterpolationToken {
315    pub interpolation: TemplateInterpolation,
316    pub interpolation_index: usize,
317    pub token_index: usize,
318    pub span: SourceSpan,
319}
320
321#[derive(Clone, Debug, PartialEq, Eq)]
322pub enum TemplateToken {
323    StaticText(StaticTextToken),
324    Interpolation(InterpolationToken),
325}
326
327#[derive(Clone, Debug, PartialEq, Eq)]
328pub enum StreamItem {
329    Char {
330        ch: char,
331        span: SourceSpan,
332    },
333    Interpolation {
334        interpolation: TemplateInterpolation,
335        interpolation_index: usize,
336        span: SourceSpan,
337    },
338    Eof {
339        span: SourceSpan,
340    },
341}
342
343impl StreamItem {
344    #[must_use]
345    pub fn kind(&self) -> &'static str {
346        match self {
347            Self::Char { .. } => "char",
348            Self::Interpolation { .. } => "interpolation",
349            Self::Eof { .. } => "eof",
350        }
351    }
352
353    #[must_use]
354    pub fn char(&self) -> Option<char> {
355        match self {
356            Self::Char { ch, .. } => Some(*ch),
357            _ => None,
358        }
359    }
360
361    #[must_use]
362    pub fn interpolation(&self) -> Option<&TemplateInterpolation> {
363        match self {
364            Self::Interpolation { interpolation, .. } => Some(interpolation),
365            _ => None,
366        }
367    }
368
369    #[must_use]
370    pub fn interpolation_index(&self) -> Option<usize> {
371        match self {
372            Self::Interpolation {
373                interpolation_index,
374                ..
375            } => Some(*interpolation_index),
376            _ => None,
377        }
378    }
379
380    #[must_use]
381    pub fn span(&self) -> &SourceSpan {
382        match self {
383            Self::Char { span, .. } | Self::Interpolation { span, .. } | Self::Eof { span } => span,
384        }
385    }
386}
387
388#[derive(Clone, Debug, PartialEq, Eq)]
389pub enum TemplateSegment {
390    StaticText(String),
391    Interpolation(TemplateInterpolation),
392}
393
394#[derive(Clone, Debug, PartialEq, Eq)]
395pub struct TemplateInput {
396    pub segments: Vec<TemplateSegment>,
397}
398
399impl TemplateInput {
400    #[must_use]
401    pub fn from_segments(segments: Vec<TemplateSegment>) -> Self {
402        Self { segments }
403    }
404
405    #[must_use]
406    pub fn from_parts(strings: Vec<String>, interpolations: Vec<TemplateInterpolation>) -> Self {
407        debug_assert_eq!(strings.len(), interpolations.len() + 1);
408
409        let mut segments = Vec::with_capacity(strings.len() + interpolations.len());
410        for (interpolation_index, interpolation) in interpolations.into_iter().enumerate() {
411            let text = strings[interpolation_index].clone();
412            if !text.is_empty() {
413                segments.push(TemplateSegment::StaticText(text));
414            }
415            segments.push(TemplateSegment::Interpolation(interpolation));
416        }
417
418        let tail = strings.last().cloned().unwrap_or_default();
419        if !tail.is_empty() || segments.is_empty() {
420            segments.push(TemplateSegment::StaticText(tail));
421        }
422
423        Self { segments }
424    }
425
426    #[must_use]
427    pub fn tokenize(&self) -> Vec<TemplateToken> {
428        let mut tokens = Vec::new();
429
430        for (token_index, segment) in self.segments.iter().enumerate() {
431            match segment {
432                TemplateSegment::StaticText(text) => {
433                    let end = text.chars().count();
434                    tokens.push(TemplateToken::StaticText(StaticTextToken {
435                        text: text.clone(),
436                        token_index,
437                        span: SourceSpan::between(
438                            SourcePosition {
439                                token_index,
440                                offset: 0,
441                            },
442                            SourcePosition {
443                                token_index,
444                                offset: end,
445                            },
446                        ),
447                    }));
448                }
449                TemplateSegment::Interpolation(interpolation) => {
450                    tokens.push(TemplateToken::Interpolation(InterpolationToken {
451                        interpolation: interpolation.clone(),
452                        interpolation_index: interpolation.interpolation_index,
453                        token_index,
454                        span: SourceSpan::point(token_index, 0),
455                    }));
456                }
457            }
458        }
459
460        tokens
461    }
462
463    #[must_use]
464    pub fn flatten(&self) -> Vec<StreamItem> {
465        let mut items = Vec::new();
466
467        for token in self.tokenize() {
468            match token {
469                TemplateToken::StaticText(token) => {
470                    for (offset, ch) in token.text.chars().enumerate() {
471                        items.push(StreamItem::Char {
472                            ch,
473                            span: SourceSpan::between(
474                                SourcePosition {
475                                    token_index: token.token_index,
476                                    offset,
477                                },
478                                SourcePosition {
479                                    token_index: token.token_index,
480                                    offset: offset + 1,
481                                },
482                            ),
483                        });
484                    }
485                }
486                TemplateToken::Interpolation(token) => {
487                    items.push(StreamItem::Interpolation {
488                        interpolation: token.interpolation,
489                        interpolation_index: token.interpolation_index,
490                        span: token.span,
491                    });
492                }
493            }
494        }
495
496        let eof_span = items
497            .last()
498            .map_or_else(|| SourceSpan::point(0, 0), |item| item.span().clone());
499        items.push(StreamItem::Eof { span: eof_span });
500        items
501    }
502
503    #[must_use]
504    pub fn interpolation(&self, interpolation_index: usize) -> Option<&TemplateInterpolation> {
505        self.segments.iter().find_map(|segment| match segment {
506            TemplateSegment::Interpolation(interpolation)
507                if interpolation.interpolation_index == interpolation_index =>
508            {
509                Some(interpolation)
510            }
511            _ => None,
512        })
513    }
514
515    #[must_use]
516    pub fn interpolation_raw_source(&self, interpolation_index: usize) -> Option<&str> {
517        self.interpolation(interpolation_index)
518            .and_then(|interpolation| interpolation.raw_source.as_deref())
519    }
520}
521
522#[cfg(test)]
523mod tests {
524    use super::{
525        Diagnostic, DiagnosticSeverity, ErrorKind, SourcePosition, SourceSpan, StreamItem,
526        TemplateInput, TemplateInterpolation, TemplateSegment, TemplateToken,
527    };
528
529    #[test]
530    fn span_helpers_compose() {
531        let base = SourceSpan::between(
532            SourcePosition {
533                token_index: 0,
534                offset: 0,
535            },
536            SourcePosition {
537                token_index: 0,
538                offset: 3,
539            },
540        );
541        let extended = base.extend(SourcePosition {
542            token_index: 0,
543            offset: 5,
544        });
545        let merged = base.merge(&SourceSpan::point(2, 0));
546        assert_eq!(extended.end.offset, 5);
547        assert_eq!(merged.end.token_index, 2);
548    }
549
550    #[test]
551    fn tokenize_and_flatten_templates_preserve_structure() {
552        let template = TemplateInput::from_segments(vec![
553            TemplateSegment::StaticText("{\"name\": ".to_owned()),
554            TemplateSegment::Interpolation(TemplateInterpolation {
555                expression: "value".to_owned(),
556                conversion: None,
557                format_spec: String::new(),
558                interpolation_index: 0,
559                raw_source: Some("{value}".to_owned()),
560            }),
561            TemplateSegment::StaticText("}".to_owned()),
562        ]);
563
564        let tokens = template.tokenize();
565        assert_eq!(tokens.len(), 3);
566        assert!(matches!(tokens[0], TemplateToken::StaticText(_)));
567        assert!(matches!(tokens[1], TemplateToken::Interpolation(_)));
568        assert!(matches!(tokens[2], TemplateToken::StaticText(_)));
569
570        let items = template.flatten();
571        assert_eq!(
572            items
573                .iter()
574                .take(5)
575                .map(StreamItem::kind)
576                .collect::<Vec<_>>(),
577            vec!["char", "char", "char", "char", "char"]
578        );
579        assert_eq!(items.last().map(StreamItem::kind), Some("eof"));
580    }
581
582    #[test]
583    fn from_parts_preserves_interpolation_metadata() {
584        let extracted = TemplateInput::from_parts(
585            vec!["hello ".to_owned(), String::new()],
586            vec![TemplateInterpolation {
587                expression: "value".to_owned(),
588                conversion: Some("r".to_owned()),
589                format_spec: ">5".to_owned(),
590                interpolation_index: 0,
591                raw_source: Some("{value!r:>5}".to_owned()),
592            }],
593        );
594
595        assert_eq!(extracted.segments.len(), 2);
596        let TemplateSegment::Interpolation(interpolation) = &extracted.segments[1] else {
597            panic!("expected interpolation segment");
598        };
599        assert_eq!(interpolation.expression, "value");
600        assert_eq!(interpolation.conversion.as_deref(), Some("r"));
601        assert_eq!(interpolation.format_spec, ">5");
602        assert_eq!(interpolation.interpolation_index, 0);
603        assert_eq!(interpolation.expression_label(), "value");
604    }
605
606    #[test]
607    fn interpolation_lookup_preserves_raw_source() {
608        let template = TemplateInput::from_parts(
609            vec!["hello ".to_owned(), String::new()],
610            vec![TemplateInterpolation {
611                expression: "value".to_owned(),
612                conversion: Some("r".to_owned()),
613                format_spec: ">5".to_owned(),
614                interpolation_index: 0,
615                raw_source: Some("{value!r:>5}".to_owned()),
616            }],
617        );
618
619        let interpolation = template.interpolation(0).expect("expected interpolation");
620        assert_eq!(interpolation.expression, "value");
621        assert_eq!(template.interpolation_raw_source(0), Some("{value!r:>5}"));
622        assert_eq!(template.interpolation_raw_source(1), None);
623    }
624
625    #[test]
626    fn diagnostics_capture_code_and_span() {
627        let span = SourceSpan::point(3, 2);
628        let diagnostic = Diagnostic::error("json.parse", "unexpected token", Some(span.clone()));
629        assert_eq!(diagnostic.code, "json.parse");
630        assert_eq!(diagnostic.severity, DiagnosticSeverity::Error);
631        assert_eq!(diagnostic.span, Some(span));
632        let error = super::BackendError::parse_at(
633            "json.parse",
634            "unexpected token",
635            Some(SourceSpan::point(1, 0)),
636        );
637        assert_eq!(error.kind, ErrorKind::Parse);
638        assert_eq!(error.diagnostics.len(), 1);
639        assert_eq!(error.diagnostics[0].code, "json.parse");
640    }
641}