#![cfg_attr(feature = "strict_docs", allow(missing_docs))]
use crate::parser_v4::{ParseNode, Parser as CoreParser};
use crate::pure_incremental::Edit as CoreEdit;
use crate::pure_parser;
use adze_glr_core::ParseTable;
use adze_ir::Grammar;
use std::sync::Arc;
#[derive(Clone, Debug)]
pub(crate) struct OwnedCoreTree {
pub root: ParseNode,
pub source: Vec<u8>,
pub error_count: usize,
}
impl OwnedCoreTree {
pub(crate) fn root_kind(&self) -> u16 {
self.root.symbol.0
}
pub(crate) fn error_count(&self) -> usize {
self.error_count
}
}
#[derive(Copy, Clone, Debug, Default, Eq, PartialEq)]
pub struct Point {
pub row: u32,
pub column: u32,
}
impl From<(u32, u32)> for Point {
fn from((row, column): (u32, u32)) -> Self {
Point { row, column }
}
}
impl From<Point> for (u32, u32) {
fn from(p: Point) -> Self {
(p.row, p.column)
}
}
#[derive(Clone, Debug, Default)]
pub struct InputEdit {
pub start_byte: usize,
pub old_end_byte: usize,
pub new_end_byte: usize,
pub start_position: Point,
pub old_end_position: Point,
pub new_end_position: Point,
}
impl From<InputEdit> for CoreEdit {
fn from(e: InputEdit) -> Self {
CoreEdit {
start_byte: e.start_byte,
old_end_byte: e.old_end_byte,
new_end_byte: e.new_end_byte,
start_point: pure_parser::Point {
row: e.start_position.row,
column: e.start_position.column,
},
old_end_point: pure_parser::Point {
row: e.old_end_position.row,
column: e.old_end_position.column,
},
new_end_point: pure_parser::Point {
row: e.new_end_position.row,
column: e.new_end_position.column,
},
}
}
}
#[derive(Clone, Debug)]
pub struct Language {
pub name: String,
pub grammar: Grammar,
pub table: ParseTable,
}
impl Language {
pub fn new(name: impl Into<String>, grammar: Grammar, table: ParseTable) -> Self {
Self {
name: name.into(),
grammar,
table,
}
}
}
pub struct Parser {
core: Option<CoreParser>,
lang: Option<Arc<Language>>,
}
impl Parser {
pub fn new() -> Self {
Self {
core: None,
lang: None,
}
}
pub fn set_language(&mut self, lang: Arc<Language>) -> Result<(), String> {
self.lang = Some(Arc::clone(&lang));
self.core = Some(CoreParser::new(
lang.grammar.clone(),
lang.table.clone(),
lang.name.clone(),
));
Ok(())
}
pub fn parse(&mut self, source: &str, _old: Option<&Tree>) -> Option<Tree> {
let core_parser = self.core.as_mut()?;
let lang = self.lang.as_ref()?;
match core_parser.parse_tree(source) {
Ok(root) => Some(Tree {
core: OwnedCoreTree {
root,
source: source.as_bytes().to_vec(),
error_count: 0, },
last_edit: None,
language: lang.clone(),
}),
Err(_) => None,
}
}
pub fn language(&self) -> Option<&Arc<Language>> {
self.lang.as_ref()
}
}
impl Default for Parser {
fn default() -> Self {
Self::new()
}
}
#[derive(Clone, Debug)]
pub struct Tree {
pub(crate) core: OwnedCoreTree,
pub(crate) last_edit: Option<CoreEdit>,
pub(crate) language: Arc<Language>,
}
impl Tree {
pub fn edit(&mut self, edit: &InputEdit) {
let core_edit = CoreEdit::from(edit.clone());
self.last_edit = Some(core_edit);
}
pub fn root_node(&self) -> Node<'_> {
Node::new(self, 0)
}
pub fn root_kind(&self) -> &str {
let sym = self.core.root_kind();
if let Some(name) = self
.language
.grammar
.rule_names
.get(&adze_ir::SymbolId(sym))
{
return name.as_str();
}
if let Some(name) = self
.language
.table
.index_to_symbol
.get(sym as usize)
.and_then(|sid| self.language.grammar.rule_names.get(sid))
{
return name.as_str();
}
"unknown"
}
pub fn error_count(&self) -> usize {
self.core.error_count()
}
pub fn has_errors(&self) -> bool {
self.error_count() > 0
}
}
#[derive(Debug, Clone)]
pub struct Node<'a> {
tree: &'a Tree,
index: usize,
byte_range: Option<(usize, usize)>,
position_range: Option<(Point, Point)>,
}
impl<'a> Node<'a> {
fn new(tree: &'a Tree, index: usize) -> Self {
let (byte_range, position_range) = Self::compute_ranges(tree, index);
Self {
tree,
index,
byte_range,
position_range,
}
}
#[allow(clippy::type_complexity)]
fn compute_ranges(
tree: &Tree,
index: usize,
) -> (Option<(usize, usize)>, Option<(Point, Point)>) {
if index == 0 {
let byte_end = tree.core.source.len();
let end_position = Self::byte_to_point(&tree.core.source, byte_end);
(
Some((0, byte_end)),
Some((Point { row: 0, column: 0 }, end_position)),
)
} else {
(None, None)
}
}
fn byte_to_point(source: &[u8], byte_pos: usize) -> Point {
let mut row = 0;
let mut column = 0;
for (i, &byte) in source.iter().enumerate() {
if i >= byte_pos {
break;
}
if byte == b'\n' {
row += 1;
column = 0;
} else {
column += 1;
}
}
Point { row, column }
}
pub fn kind(&self) -> &str {
if self.index == 0 {
self.tree.root_kind()
} else {
"unknown"
}
}
pub fn start_byte(&self) -> usize {
self.byte_range.map(|(start, _)| start).unwrap_or(0)
}
pub fn end_byte(&self) -> usize {
self.byte_range.map(|(_, end)| end).unwrap_or(0)
}
pub fn start_position(&self) -> Point {
self.position_range
.map(|(start, _)| start)
.unwrap_or_default()
}
pub fn end_position(&self) -> Point {
self.position_range.map(|(_, end)| end).unwrap_or_default()
}
pub fn child_count(&self) -> usize {
if self.index == 0 {
if !self.tree.core.source.is_empty() && self.tree.error_count() == 0 {
0 } else {
0
}
} else {
0
}
}
pub fn child(&self, index: usize) -> Option<Node<'a>> {
if index < self.child_count() {
None
} else {
None
}
}
pub fn is_error(&self) -> bool {
if self.index == 0 {
self.tree.error_count() > 0
} else {
false
}
}
pub fn is_missing(&self) -> bool {
if self.index == 0 {
self.tree.core.source.is_empty() && self.tree.error_count() > 0
} else {
false
}
}
pub fn byte_range(&self) -> std::ops::Range<usize> {
let (start, end) = self.byte_range.unwrap_or((0, 0));
start..end
}
pub fn utf8_text<'b>(&self, source: &'b [u8]) -> Result<&'b str, std::str::Utf8Error> {
let range = self.byte_range();
let slice = source.get(range).unwrap_or(&[]);
std::str::from_utf8(slice)
}
pub fn text(&self, source: &[u8]) -> String {
self.utf8_text(source).unwrap_or("").to_string()
}
}
#[cfg(test)]
mod tests {
use super::*;
use adze_glr_core::Action;
use adze_ir::SymbolId;
use std::collections::BTreeMap;
fn empty_parse_table_language() -> Arc<Language> {
Arc::new(Language::new(
"ts_compat_empty_parse_table",
Grammar::default(),
ParseTable::default(),
))
}
fn accept_on_eof_language() -> Arc<Language> {
let parse_table = ParseTable {
symbol_to_index: BTreeMap::from([(SymbolId(0), 0)]),
action_table: vec![vec![vec![Action::Accept]]],
..Default::default()
};
Arc::new(Language::new(
"ts_compat_accept_on_eof",
Grammar::default(),
parse_table,
))
}
#[test]
fn parse_ignores_old_tree_source() {
let mut parser = Parser::new();
parser.set_language(empty_parse_table_language()).unwrap();
let old_tree = parser.parse("old", None).unwrap();
let new_source = "incrementally updated";
let reparsed = parser.parse(new_source, Some(&old_tree)).unwrap();
assert_eq!(reparsed.root_node().text(new_source.as_bytes()), new_source);
assert_eq!(reparsed.core.source, new_source.as_bytes().to_vec());
assert_ne!(reparsed.core.source, old_tree.core.source);
}
#[test]
fn parse_returns_none_on_core_parse_error() {
let mut parser = Parser::new();
parser.set_language(accept_on_eof_language()).unwrap();
let tree = parser.parse("any input", None);
assert!(tree.is_none());
}
}