plotnik-compiler 0.3.2

Compiler for Plotnik query language (parser, analyzer, bytecode emitter)
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
use rowan::TextRange;

use super::{SourceId, Span};

/// Diagnostic kinds ordered by priority (highest priority first).
///
/// When two diagnostics have overlapping spans, the higher-priority one
/// suppresses the lower-priority one. This prevents cascading error noise.
///
/// Priority rationale:
/// - Unclosed delimiters cause massive cascading errors downstream
/// - Expected token errors are root causes the user should fix first
/// - Invalid syntax usage is a specific mistake at a location
/// - Naming validation errors are convention violations
/// - Semantic errors assume valid syntax
/// - Structural observations are often consequences of earlier errors
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub enum DiagnosticKind {
    // These cause cascading errors throughout the rest of the file
    UnclosedTree,
    UnclosedSequence,
    UnclosedAlternation,
    UnclosedRegex,

    // User omitted something required - root cause errors
    ExpectedExpression,
    ExpectedTypeName,
    ExpectedCaptureName,
    ExpectedFieldName,
    ExpectedSubtype,
    ExpectedPredicateValue,

    // User wrote something that doesn't belong
    EmptyTree,
    EmptyAnonymousNode,
    EmptySequence,
    EmptyAlternation,
    BareIdentifier,
    InvalidSeparator,
    AnchorInAlternation,
    InvalidFieldEquals,
    InvalidSupertypeSyntax,
    InvalidTypeAnnotationSyntax,
    ErrorTakesNoArguments,
    RefCannotHaveChildren,
    ErrorMissingOutsideParens,
    UnsupportedPredicate,
    UnexpectedToken,
    CaptureWithoutTarget,
    LowercaseBranchLabel,

    // Convention violations - fixable with suggestions
    CaptureNameHasDots,
    CaptureNameHasHyphens,
    CaptureNameUppercase,
    DefNameLowercase,
    DefNameHasSeparators,
    BranchLabelHasSeparators,
    FieldNameHasDots,
    FieldNameHasHyphens,
    FieldNameUppercase,
    TypeNameInvalidChars,
    TreeSitterSequenceSyntax,
    NegationSyntaxDeprecated,

    // Valid syntax, invalid semantics
    DuplicateDefinition,
    UndefinedReference,
    MixedAltBranches,
    RecursionNoEscape,
    DirectRecursion,
    FieldSequenceValue,
    AnchorWithoutContext,

    // Type inference errors
    IncompatibleTypes,
    MultiCaptureQuantifierNoName,
    UnusedBranchLabels,
    StrictDimensionalityViolation,
    MultiElementScalarCapture,
    UncapturedOutputWithCaptures,
    AmbiguousUncapturedOutputs,
    DuplicateCaptureInScope,
    IncompatibleCaptureTypes,
    IncompatibleStructShapes,

    // Predicate validation
    PredicateOnNonLeaf,
    EmptyRegex,
    RegexBackreference,
    RegexLookaround,
    RegexNamedCapture,
    RegexSyntaxError,

    // Link pass - grammar validation
    UnknownNodeType,
    UnknownField,
    FieldNotOnNodeType,
    InvalidFieldChildType,
    InvalidChildType,

    // Often consequences of earlier errors
    UnnamedDef,
}

impl DiagnosticKind {
    /// Default severity for this kind. Can be overridden by policy.
    pub fn default_severity(&self) -> Severity {
        match self {
            Self::UnusedBranchLabels
            | Self::TreeSitterSequenceSyntax
            | Self::NegationSyntaxDeprecated => Severity::Warning,
            _ => Severity::Error,
        }
    }

    /// Whether this kind suppresses `other` when spans overlap.
    ///
    /// Uses enum discriminant ordering: lower position = higher priority.
    /// A higher-priority diagnostic suppresses lower-priority ones in the same span.
    pub fn suppresses(&self, other: &DiagnosticKind) -> bool {
        self < other
    }

    /// Structural errors are Unclosed* - they cause cascading errors but
    /// should be suppressed by root-cause errors at the same position.
    pub fn is_structural_error(&self) -> bool {
        matches!(
            self,
            Self::UnclosedTree
                | Self::UnclosedSequence
                | Self::UnclosedAlternation
                | Self::UnclosedRegex
        )
    }

    /// Root cause errors - user omitted something required.
    /// These suppress structural errors at the same position.
    pub fn is_root_cause_error(&self) -> bool {
        matches!(
            self,
            Self::ExpectedExpression
                | Self::ExpectedTypeName
                | Self::ExpectedCaptureName
                | Self::ExpectedFieldName
                | Self::ExpectedSubtype
                | Self::ExpectedPredicateValue
        )
    }

    /// Consequence errors - often caused by earlier parse errors.
    /// These get suppressed when any root-cause or structural error exists.
    pub fn is_consequence_error(&self) -> bool {
        matches!(self, Self::UnnamedDef)
    }

    /// Default hint for this kind, automatically included in diagnostics.
    /// Call sites can add additional hints for context-specific information.
    pub fn default_hint(&self) -> Option<&'static str> {
        match self {
            Self::ExpectedSubtype => Some("e.g., `expression/binary_expression`"),
            Self::ExpectedTypeName => Some("e.g., `::MyType` or `::string`"),
            Self::ExpectedFieldName => Some("e.g., `-value`"),
            Self::EmptyTree => Some("use `(_)` to match any named node, or `_` for any node"),
            Self::EmptyAnonymousNode => Some("use a valid anonymous node or remove it"),
            Self::EmptySequence => Some("sequences must contain at least one expression"),
            Self::EmptyAlternation => Some("alternations must contain at least one branch"),
            Self::TreeSitterSequenceSyntax => Some("use `{...}` for sequences"),
            Self::NegationSyntaxDeprecated => Some("use `-field` instead of `!field`"),
            Self::MixedAltBranches => {
                Some("use all labels for a tagged union, or none for a merged struct")
            }
            Self::RecursionNoEscape => {
                Some("add a non-recursive branch to terminate: `[Base: ... Rec: (Self)]`")
            }
            Self::DirectRecursion => {
                Some("recursive references must consume input before recursing")
            }
            Self::AnchorWithoutContext => Some("wrap in a named node: `(parent . (child))`"),
            Self::AnchorInAlternation => Some("use `[{(a) . (b)} (c)]` to anchor within a branch"),
            Self::UncapturedOutputWithCaptures => Some("add `@name` to capture the output"),
            Self::AmbiguousUncapturedOutputs => {
                Some("capture each expression explicitly: `(X) @x (Y) @y`")
            }
            Self::MultiElementScalarCapture => {
                Some("add internal captures: `{(a) @a (b) @b}* @items`")
            }
            _ => None,
        }
    }

    /// Base message for this diagnostic kind, used when no custom message is provided.
    pub fn fallback_message(&self) -> &'static str {
        match self {
            // Unclosed delimiters
            Self::UnclosedTree => "missing closing `)`",
            Self::UnclosedSequence => "missing closing `}`",
            Self::UnclosedAlternation => "missing closing `]`",
            Self::UnclosedRegex => "missing closing `/` for regex",

            // Expected token errors
            Self::ExpectedExpression => "expected an expression",
            Self::ExpectedTypeName => "expected type name",
            Self::ExpectedCaptureName => "expected capture name",
            Self::ExpectedFieldName => "expected field name",
            Self::ExpectedSubtype => "expected subtype name",
            Self::ExpectedPredicateValue => "expected string or regex after predicate operator",

            // Invalid syntax
            Self::EmptyTree => "empty `()` is not allowed",
            Self::EmptyAnonymousNode => "empty anonymous node",
            Self::EmptySequence => "empty `{}` is not allowed",
            Self::EmptyAlternation => "empty `[]` is not allowed",
            Self::BareIdentifier => "bare identifier is not valid",
            Self::InvalidSeparator => "unexpected separator",
            Self::AnchorInAlternation => "anchors cannot appear directly in alternations",
            Self::InvalidFieldEquals => "use `:` instead of `=`",
            Self::InvalidSupertypeSyntax => "references cannot have supertypes",
            Self::InvalidTypeAnnotationSyntax => "use `::` for type annotations",
            Self::ErrorTakesNoArguments => "`(ERROR)` cannot have children",
            Self::RefCannotHaveChildren => "references cannot have children",
            Self::ErrorMissingOutsideParens => "special node requires parentheses",
            Self::UnsupportedPredicate => "predicates are not supported",
            Self::UnexpectedToken => "unexpected token",
            Self::CaptureWithoutTarget => "capture has no target",
            Self::LowercaseBranchLabel => "branch label must start with uppercase",

            // Naming convention violations
            Self::CaptureNameHasDots => "capture names cannot contain `.`",
            Self::CaptureNameHasHyphens => "capture names cannot contain `-`",
            Self::CaptureNameUppercase => "capture names must be lowercase",
            Self::DefNameLowercase => "definition names must start uppercase",
            Self::DefNameHasSeparators => "definition names must be PascalCase",
            Self::BranchLabelHasSeparators => "branch labels must be PascalCase",
            Self::FieldNameHasDots => "field names cannot contain `.`",
            Self::FieldNameHasHyphens => "field names cannot contain `-`",
            Self::FieldNameUppercase => "field names must be lowercase",
            Self::TypeNameInvalidChars => "type names cannot contain `.` or `-`",
            Self::TreeSitterSequenceSyntax => "tree-sitter sequence syntax",
            Self::NegationSyntaxDeprecated => "deprecated negation syntax",

            // Semantic errors
            Self::DuplicateDefinition => "duplicate definition",
            Self::UndefinedReference => "undefined reference",
            Self::MixedAltBranches => "cannot mix labeled and unlabeled branches",
            Self::RecursionNoEscape => "infinite recursion: no escape path",
            Self::DirectRecursion => "infinite recursion: cycle consumes no input",
            Self::FieldSequenceValue => "field cannot match a sequence",
            Self::AnchorWithoutContext => "boundary anchor requires parent node context",

            // Type inference
            Self::IncompatibleTypes => "incompatible types",
            Self::MultiCaptureQuantifierNoName => {
                "quantified expression with multiple captures requires a struct capture"
            }
            Self::UnusedBranchLabels => "branch labels have no effect without capture",
            Self::StrictDimensionalityViolation => {
                "quantifier with captures requires a struct capture"
            }
            Self::MultiElementScalarCapture => {
                "cannot capture multi-element pattern as scalar array"
            }
            Self::UncapturedOutputWithCaptures => {
                "output-producing expression requires capture when siblings have captures"
            }
            Self::AmbiguousUncapturedOutputs => {
                "multiple expressions produce output without capture"
            }
            Self::DuplicateCaptureInScope => "duplicate capture in scope",
            Self::IncompatibleCaptureTypes => "incompatible capture types",
            Self::IncompatibleStructShapes => "incompatible struct shapes",

            // Predicate validation
            Self::PredicateOnNonLeaf => {
                "predicates match text content, but this node can contain children"
            }
            Self::EmptyRegex => "empty regex pattern",
            Self::RegexBackreference => "backreferences are not supported in regex",
            Self::RegexLookaround => "lookahead/lookbehind is not supported in regex",
            Self::RegexNamedCapture => "named captures are not supported in regex",
            Self::RegexSyntaxError => "invalid regex syntax",

            // Link pass - grammar validation
            Self::UnknownNodeType => "unknown node type",
            Self::UnknownField => "unknown field",
            Self::FieldNotOnNodeType => "field not valid on this node type",
            Self::InvalidFieldChildType => "node type not valid for this field",
            Self::InvalidChildType => "node type not valid as child",

            // Structural
            Self::UnnamedDef => "definition must be named",
        }
    }

    /// Template for custom messages. Contains `{}` placeholder for caller-provided detail.
    pub fn custom_message(&self) -> String {
        match self {
            // Special formatting for references
            Self::RefCannotHaveChildren => {
                "`{}` is a reference and cannot have children".to_string()
            }
            Self::FieldSequenceValue => "field `{}` cannot match a sequence".to_string(),

            // Semantic errors with name context
            Self::DuplicateDefinition => "`{}` is already defined".to_string(),
            Self::UndefinedReference => "`{}` is not defined".to_string(),
            Self::IncompatibleTypes => "incompatible types: {}".to_string(),

            // Type inference errors with context
            Self::StrictDimensionalityViolation => "{}".to_string(),
            Self::MultiElementScalarCapture => "{}".to_string(),
            Self::DuplicateCaptureInScope => {
                "capture `@{}` already defined in this scope".to_string()
            }
            Self::IncompatibleCaptureTypes => {
                "capture `@{}` has incompatible types across branches".to_string()
            }
            Self::IncompatibleStructShapes => {
                "capture `@{}` has incompatible struct fields across branches".to_string()
            }

            // Link pass errors with context
            Self::UnknownNodeType => "`{}` is not a valid node type".to_string(),
            Self::UnknownField => "`{}` is not a valid field".to_string(),
            Self::FieldNotOnNodeType => "field `{}` is not valid on this node type".to_string(),
            Self::InvalidFieldChildType => "node type `{}` is not valid for this field".to_string(),
            Self::InvalidChildType => "`{}` cannot be a child of this node".to_string(),

            // Alternation mixing
            Self::MixedAltBranches => "cannot mix labeled and unlabeled branches: {}".to_string(),

            // Unclosed with context
            Self::UnclosedTree | Self::UnclosedSequence | Self::UnclosedAlternation => {
                format!("{}; {{}}", self.fallback_message())
            }

            // Type annotation specifics
            Self::InvalidTypeAnnotationSyntax => "use `::` for type annotations: {}".to_string(),

            // Named def (no custom message needed; suggestion goes in hint)
            Self::UnnamedDef => self.fallback_message().to_string(),

            // Standard pattern: fallback + context
            _ => format!("{}: {{}}", self.fallback_message()),
        }
    }

    /// Render the final message.
    ///
    /// - `None` → returns `fallback_message()`
    /// - `Some(detail)` → returns `custom_message()` with `{}` replaced by detail
    pub fn message(&self, msg: Option<&str>) -> String {
        match msg {
            None => self.fallback_message().to_string(),
            Some(detail) => self.custom_message().replace("{}", detail),
        }
    }
}

#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
pub enum Severity {
    #[default]
    Error,
    Warning,
}

impl std::fmt::Display for Severity {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        match self {
            Severity::Error => write!(f, "error"),
            Severity::Warning => write!(f, "warning"),
        }
    }
}

#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Fix {
    pub(crate) replacement: String,
    pub(crate) description: String,
}

impl Fix {
    pub fn new(replacement: impl Into<String>, description: impl Into<String>) -> Self {
        Self {
            replacement: replacement.into(),
            description: description.into(),
        }
    }
}

#[derive(Debug, Clone, PartialEq, Eq)]
pub struct RelatedInfo {
    pub(crate) span: Span,
    pub(crate) message: String,
}

impl RelatedInfo {
    pub fn new(source: SourceId, range: TextRange, message: impl Into<String>) -> Self {
        Self {
            span: Span::new(source, range),
            message: message.into(),
        }
    }
}

#[derive(Debug, Clone, PartialEq, Eq)]
pub(crate) struct DiagnosticMessage {
    pub(crate) kind: DiagnosticKind,
    /// Which source file this diagnostic belongs to.
    pub(crate) source: SourceId,
    /// The range shown to the user (underlined in output).
    pub(crate) range: TextRange,
    /// The range used for suppression logic. Errors within another error's
    /// suppression_range may be suppressed. Defaults to `range` but can be
    /// set to a parent context (e.g., enclosing tree span) for better cascading
    /// error suppression.
    pub(crate) suppression_range: TextRange,
    pub(crate) message: String,
    pub(crate) fix: Option<Fix>,
    pub(crate) related: Vec<RelatedInfo>,
    pub(crate) hints: Vec<String>,
}

impl DiagnosticMessage {
    pub(crate) fn new(
        source: SourceId,
        kind: DiagnosticKind,
        range: TextRange,
        message: impl Into<String>,
    ) -> Self {
        Self {
            kind,
            source,
            range,
            suppression_range: range,
            message: message.into(),
            fix: None,
            related: Vec::new(),
            hints: Vec::new(),
        }
    }

    pub(crate) fn with_default_message(
        source: SourceId,
        kind: DiagnosticKind,
        range: TextRange,
    ) -> Self {
        Self::new(source, kind, range, kind.fallback_message())
    }

    pub(crate) fn severity(&self) -> Severity {
        self.kind.default_severity()
    }

    pub(crate) fn is_error(&self) -> bool {
        self.severity() == Severity::Error
    }

    pub(crate) fn is_warning(&self) -> bool {
        self.severity() == Severity::Warning
    }
}

impl std::fmt::Display for DiagnosticMessage {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        write!(
            f,
            "{} at {}..{}: {}",
            self.severity(),
            u32::from(self.range.start()),
            u32::from(self.range.end()),
            self.message
        )?;
        if let Some(fix) = &self.fix {
            write!(f, " (fix: {})", fix.description)?;
        }
        for related in &self.related {
            write!(
                f,
                " (related: {} at {}..{})",
                related.message,
                u32::from(related.span.range.start()),
                u32::from(related.span.range.end())
            )?;
        }
        for hint in &self.hints {
            write!(f, " (hint: {})", hint)?;
        }
        Ok(())
    }
}