tree-sitter-lean4 0.0.2

Tree-sitter grammar for Lean 4 (generates parser at build time)
Documentation
const attr = require('./grammar/attr.js')
const command = require('./grammar/command.js')
const PREC = require('./grammar/constants.js')
const tactic = require('./grammar/tactic.js')
const term = require('./grammar/term.js')
const {sep1, sep1_} = require('./grammar/util.js')

module.exports = grammar({
  name: 'lean',

  extras: $ => [
    $.comment,
    /\s/,
  ],

  externals: $ => [
    $._newline,
  ],

  // Significantly reduced conflicts for minimal grammar
  conflicts: $ => [
    [$._binder_ident, $._term],
    [$._binder_ident, $.named_argument],
    [$._binder_ident, $.subtype],
    [$._binder_ident],
    [$._have_id_decl, $._expression],
    [$._have_id_lhs, $._term],
    [$._have_id_lhs],
    [$._let_id_lhs, $._term],
    [$._let_id_lhs],
    [$._simple_binder],
    [$.identifier],
    [$.instance_binder, $._term],
    [$.instance_binder, $.list],
    [$.proj, $._expression],
    [$.have_tactic, $._have_id_lhs, $._term],
    [$.let_tactic, $._let_id_lhs, $._term],
  ],

  word: $ => $._identifier,

  rules: {
    // src/Lean/Parser/Module.lean
    module: $ => seq(
      optional($.prelude),
      repeat($.import),
      repeat($._command),
    ),
    prelude: $ => 'prelude',
    import: $ => seq('import', field('module', $.identifier)),

    parameters: $ => seq(
      repeat1(
        choice(
          field('name', $.identifier),
          $.hole,
          $._bracketed_binder,
          $.anonymous_constructor,
        )
      ),
    ),

    // Simplified expression - removed do, unless, quoted_tactic as primary choices
    _expression: $ => choice(
      $.apply,
      $.comparison,
      $.let,
      $.tactics,
      $.binary_expression,
      $.neg,
      $.fun,
      $._term,
    ),

    let: $ => prec.left(seq(
      'let',
      field('name', $.identifier),
      optional(field('parameters', $.parameters)),
      optional(seq(':', field('type', $._expression))),
      ':=',
      field('value', $._expression),
      choice($._newline, ';'),
      optional(field('body', $._expression)),
    )),

    fun: $ => prec.right(seq(
      choice('fun', 'λ'),
      choice(
        seq(
          $.parameters,
          '=>',
          $._expression,
        ),
        repeat(seq(
          '|',
          field('lhs', sep1($._expression, ',')),
          '=>',
          $._expression,
        )),
      ),
    )),

    apply: $ => choice($._apply, $._dollar),

    _apply: $ => prec(PREC.apply, seq(
      field('name', term.term.forbid($, 'match')),
      field('arguments', repeat1($._argument)),
    )),

    _dollar: $ => prec.right(PREC.dollar, seq(
      field('name', $._expression),
      '$',
      field('argument', $._expression),
    )),

    neg: $ => prec(PREC.unary, seq('-', $._expression)),

    // Simplified binary operators - collapsed to 3 main precedence groups
    binary_expression: $ => choice(
      // High precedence: power (right associative)
      prec.right(PREC.power, seq($._expression, '^', $._expression)),

      // Medium precedence: arithmetic (left associative)
      prec.left(PREC.times, seq($._expression, choice('*', '/', '%'), $._expression)),
      prec.left(PREC.plus, seq($._expression, choice('+', '-', '++', '::'), $._expression)),
      // Composition (right associative)
      prec.right(PREC.plus, seq($._expression, '', $._expression)),

      // Boolean equality (higher precedence than logical operators)
      prec.left(PREC.eqeq, seq($._expression, '==', $._expression)),

      // Logical operators (lower precedence)
      prec.left(PREC.or, seq($._expression, '||', $._expression)),
      prec.left(PREC.and, seq($._expression, choice('', '', '/\\', '\\/', '', '&&'), $._expression)),

      // Propositional equality (lowest precedence for logical expressions)
      prec.left(PREC.equal, seq($._expression, choice('=', ''), $._expression)),

      // Pipeline and monad operators
      prec.left(PREC.opop, seq($._expression, choice('|>', '|>.', '<|>', '>>', '>>=', '<*>', '<$>'), $._expression)),
      prec.right(PREC.dollar, seq($._expression, '<|', $._expression)),
    ),

    comparison: $ => prec.left(PREC.compare, seq(
      $._expression,
      choice('<', '>', '', '', '<=', '>=', ''),
      $._expression,
    )),

    comment: $ => token(choice(
      seq('--', /.*/),
      seq(
        '/-',
        repeat(choice(
          /[^-]/,
          /-[^/]/
        )),
        '-/',
      ),
    )),

    _identifier: $ => /[_a-zA-ZͰ-ϿĀ-ſ\U0001D400-\U0001D7FF][_`'`a-zA-Z0-9Ͱ-ϿĀ-ſ∇!?\u2070-\u209F\U0001D400-\U0001D7FF]*/,
    _escaped_identifier: $ =>  /«[^»]*»/,

    ...attr,
    ...command,
    ...tactic,
    ...term.rules,
  }
});