use std::fmt::{Debug, Display, Formatter};
use std::iter;
use std::iter::Cloned;
use std::marker::PhantomData;
use std::slice::Iter;
use std::str::{from_utf8, Utf8Error};
pub use syntax_kind::SyntaxKind;
use crate::cst::error_merger::ErrorMerger;
use crate::cst::SyntaxKind::{COMMENT, NEWLINE, WHITESPACE};
use crate::{Parser, Span};
pub(crate) mod error_merger;
pub(crate) mod syntax_kind;
pub(crate) mod syntax_stream;
#[cfg(test)]
mod tests;
#[derive(Debug, PartialEq)]
pub enum Event {
Begin { kind: SyntaxKind, span: Span },
End { kind: SyntaxKind, span: Span },
Token { kind: SyntaxKind, span: Span },
Error { message: String, span: Span },
}
pub struct CSTStream<'src, I>
where
I: Iterator<Item = Event>,
{
source: &'src [u8],
events: ErrorMerger<I>,
whitespaces: bool,
newlines: bool,
comments: bool,
}
impl<'src, I> CSTStream<'src, I>
where
I: Iterator<Item = Event>,
{
#[doc(hidden)]
pub fn new(source: &'src [u8], events: I) -> Self {
Self {
source,
events: ErrorMerger::new(events),
whitespaces: true,
newlines: true,
comments: true,
}
}
pub fn source(&self) -> &'src [u8] {
self.source
}
pub fn whitespaces(mut self, yes: bool) -> Self {
self.whitespaces = yes;
self
}
pub fn newlines(mut self, yes: bool) -> Self {
self.newlines = yes;
self
}
pub fn comments(mut self, yes: bool) -> Self {
self.comments = yes;
self
}
}
impl<I> Iterator for CSTStream<'_, I>
where
I: Iterator<Item = Event>,
{
type Item = Event;
fn next(&mut self) -> Option<Self::Item> {
loop {
match self.events.next()? {
token @ Event::Token { kind: WHITESPACE, .. } => {
if self.whitespaces {
break Some(token);
}
}
token @ Event::Token { kind: NEWLINE, .. } => {
if self.newlines {
break Some(token);
}
}
token @ Event::Token { kind: COMMENT, .. } => {
if self.comments {
break Some(token);
}
}
token => break Some(token),
}
}
}
}
struct CSTIter<'a> {
iter: rowan::api::PreorderWithTokens<YARA>,
errors: Cloned<Iter<'a, (Span, String)>>,
}
impl<'a> Iterator for CSTIter<'a> {
type Item = Event;
fn next(&mut self) -> Option<Self::Item> {
for event in self.iter.by_ref() {
match event {
rowan::WalkEvent::Enter(e) => {
return match e {
rowan::SyntaxElement::Node(node) => {
Some(Event::Begin {
kind: node.kind(),
span: Span::from(node.text_range()),
})
}
rowan::SyntaxElement::Token(token) => {
Some(Event::Token {
kind: token.kind(),
span: Span::from(token.text_range()),
})
}
}
}
rowan::WalkEvent::Leave(e) => {
if let rowan::SyntaxElement::Node(node) = e {
return Some(Event::End {
kind: node.kind(),
span: Span::from(node.text_range()),
});
}
}
}
}
if let Some((span, message)) = self.errors.next() {
return Some(Event::Error { message, span });
}
None
}
}
impl<'src> From<Parser<'src>> for CSTStream<'src, Parser<'src>> {
fn from(parser: Parser<'src>) -> Self {
CSTStream::new(parser.source(), parser)
}
}
#[allow(clippy::upper_case_acronyms)]
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
struct YARA();
impl rowan::Language for YARA {
type Kind = SyntaxKind;
fn kind_from_raw(raw: rowan::SyntaxKind) -> SyntaxKind {
unsafe { std::mem::transmute::<u16, SyntaxKind>(raw.0) }
}
fn kind_to_raw(kind: SyntaxKind) -> rowan::SyntaxKind {
kind.into()
}
}
#[doc(hidden)]
pub struct CST {
tree: rowan::SyntaxNode<YARA>,
errors: Vec<(Span, String)>,
}
impl Debug for CST {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
write!(f, "{:#?}", self.tree)?;
if !self.errors.is_empty() {
writeln!(f, "\nERRORS:")?;
for (span, err) in &self.errors {
writeln!(f, "- {span}: {err}")?;
}
}
Ok(())
}
}
impl CST {
pub fn root(&self) -> Node<Immutable> {
Node::new(self.tree.clone())
}
pub fn iter(&self) -> impl Iterator<Item = Event> + '_ {
CSTIter {
iter: self.tree.preorder_with_tokens(),
errors: self.errors.iter().cloned(),
}
}
}
impl<'src> From<&'src str> for CST {
fn from(src: &'src str) -> Self {
Self::try_from(Parser::new(src.as_bytes())).unwrap()
}
}
impl TryFrom<Parser<'_>> for CST {
type Error = Utf8Error;
fn try_from(parser: Parser) -> Result<Self, Utf8Error> {
Self::try_from(CSTStream::new(parser.source(), parser))
}
}
impl<'src, I> TryFrom<CSTStream<'src, I>> for CST
where
I: Iterator<Item = Event>,
{
type Error = Utf8Error;
fn try_from(cst: CSTStream<'src, I>) -> Result<Self, Utf8Error> {
let source = cst.source();
let mut builder = rowan::GreenNodeBuilder::new();
let mut prev_token_span: Option<Span> = None;
let mut errors = Vec::new();
for node in cst {
match node {
Event::Begin { kind, .. } => builder.start_node(kind.into()),
Event::End { .. } => builder.finish_node(),
Event::Token { kind, span } => {
if let Some(prev_token_span) = prev_token_span {
assert_eq!(
prev_token_span.end(),
span.start(),
"gap in the CST, one token ends at {} and the next one starts at {}",
prev_token_span.end(),
span.start(),
);
}
let token = source.get(span.range()).unwrap();
let token = from_utf8(token)?;
builder.token(kind.into(), token);
prev_token_span = Some(span);
}
Event::Error { message, span } => errors.push((span, message)),
}
}
Ok(Self {
tree: rowan::SyntaxNode::new_root(builder.finish()),
errors,
})
}
}
#[doc(hidden)]
pub enum Direction {
Next,
Prev,
}
#[derive(PartialEq, Eq)]
#[doc(hidden)]
pub struct Text(rowan::SyntaxText);
impl Text {
#[inline]
pub fn len(&self) -> usize {
self.0.len().into()
}
#[inline]
pub fn is_empty(&self) -> bool {
self.0.is_empty()
}
#[inline]
pub fn try_fold_chunks<T, F, E>(&self, init: T, f: F) -> Result<T, E>
where
F: FnMut(T, &str) -> Result<T, E>,
{
self.0.try_fold_chunks(init, f)
}
pub fn try_for_each_chunks<F, E>(&self, f: F) -> Result<(), E>
where
F: FnMut(&str) -> Result<(), E>,
{
self.0.try_for_each_chunk(f)
}
pub fn for_each_chunks<F>(&self, f: F)
where
F: FnMut(&str),
{
self.0.for_each_chunk(f)
}
}
impl Display for Text {
#[inline]
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
std::fmt::Display::fmt(&self.0, f)
}
}
impl Debug for Text {
#[inline]
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
std::fmt::Debug::fmt(&self.0, f)
}
}
impl PartialEq<Text> for str {
fn eq(&self, other: &Text) -> bool {
other.0 == self
}
}
impl PartialEq<Text> for &str {
fn eq(&self, other: &Text) -> bool {
other == self
}
}
impl PartialEq<&'_ str> for Text {
fn eq(&self, other: &&str) -> bool {
self.0 == *other
}
}
#[doc(hidden)]
pub trait Encoding {
fn len(s: &str) -> usize;
}
#[derive(Debug, PartialEq)]
#[doc(hidden)]
pub struct Utf8 {}
#[derive(Debug, PartialEq)]
#[doc(hidden)]
pub struct Utf16 {}
#[derive(Debug, PartialEq)]
#[doc(hidden)]
pub struct Utf32 {}
impl Encoding for Utf8 {
fn len(s: &str) -> usize {
s.len()
}
}
impl Encoding for Utf16 {
fn len(s: &str) -> usize {
s.encode_utf16().count()
}
}
impl Encoding for Utf32 {
fn len(s: &str) -> usize {
s.chars().count()
}
}
#[derive(Clone, Debug, PartialEq, Eq)]
#[doc(hidden)]
pub struct Position<E: Encoding> {
pub line: usize,
pub column: usize,
_encoding: PhantomData<E>,
}
impl<E: Encoding> From<(usize, usize)> for Position<E> {
#[inline]
fn from((line, column): (usize, usize)) -> Self {
Self { line, column, _encoding: PhantomData }
}
}
#[derive(Clone, Debug, PartialEq, Eq)]
#[doc(hidden)]
pub struct Mutable;
#[derive(Clone, Debug, PartialEq, Eq)]
#[doc(hidden)]
pub struct Immutable;
#[derive(Clone, PartialEq, Eq)]
#[doc(hidden)]
pub struct Token<M> {
inner: rowan::SyntaxToken<YARA>,
_state: PhantomData<M>,
}
impl<M> Token<M> {
fn new(inner: rowan::SyntaxToken<YARA>) -> Self {
Self { inner, _state: PhantomData }
}
}
#[allow(clippy::len_without_is_empty)]
impl<M: Clone> Token<M> {
#[inline]
pub fn kind(&self) -> SyntaxKind {
self.inner.kind()
}
#[inline]
pub fn text(&self) -> &str {
self.inner.text()
}
#[inline]
pub fn span(&self) -> Span {
Span(self.inner.text_range().into())
}
#[inline]
pub fn len<E: Encoding>(&self) -> usize {
E::len(self.text())
}
pub fn start_pos<E: Encoding>(&self) -> Position<E> {
let mut line = 0;
let mut column = 0;
let mut prev_token = self.prev_token();
while let Some(token) = prev_token {
match token.kind() {
NEWLINE => line += 1,
COMMENT => {
if line == 0 {
let comment = token.text();
let last_line = match comment.rfind('\n') {
Some(idx) => &comment[idx + 1..],
None => comment, };
column += E::len(last_line);
}
line += token.text().chars().filter(|c| *c == '\n').count()
}
_ => {
if line == 0 {
column += token.len::<E>()
}
}
}
prev_token = token.prev_token();
}
Position::from((line, column))
}
pub fn end_pos<E: Encoding>(&self) -> Position<E> {
let token = self.text();
let start = self.start_pos::<E>();
match token.rfind('\n') {
Some(last_newline) => Position::from((
start.line + token.chars().filter(|c| *c == '\n').count(),
E::len(&token[last_newline + 1..]),
)),
None => {
Position::from((start.line, start.column + self.len::<E>()))
}
}
}
#[inline]
pub fn parent(&self) -> Option<Node<M>> {
self.inner.parent().map(Node::new)
}
#[inline]
pub fn ancestors(&self) -> impl Iterator<Item = Node<M>> {
self.inner.parent_ancestors().map(Node::new)
}
#[inline]
pub fn prev_token(&self) -> Option<Token<M>> {
self.inner.prev_token().map(Token::new)
}
#[inline]
pub fn next_token(&self) -> Option<Token<M>> {
self.inner.next_token().map(Token::new)
}
#[inline]
pub fn prev_sibling_or_token(&self) -> Option<NodeOrToken<M>> {
self.inner.prev_sibling_or_token().map(|x| x.into())
}
#[inline]
pub fn next_sibling_or_token(&self) -> Option<NodeOrToken<M>> {
self.inner.next_sibling_or_token().map(|x| x.into())
}
}
impl<M> Display for Token<M> {
#[inline]
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
std::fmt::Display::fmt(&self.inner, f)
}
}
impl<M> Debug for Token<M> {
#[inline]
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
std::fmt::Debug::fmt(&self.inner, f)
}
}
impl Token<Mutable> {
#[inline]
pub fn detach(&self) {
self.inner.detach()
}
pub fn replace(&mut self, text: &str) -> Node<Mutable> {
Node::new(rowan::SyntaxNode::new_root(
self.inner.replace_with(rowan::GreenToken::new(
self.kind().into(),
text,
)),
))
}
}
#[doc(hidden)]
#[derive(Clone, PartialEq, Eq, Debug)]
pub enum NodeOrToken<M> {
Node(Node<M>),
Token(Token<M>),
}
impl<M: Clone> NodeOrToken<M> {
pub fn kind(&self) -> SyntaxKind {
match self {
NodeOrToken::Node(n) => n.kind(),
NodeOrToken::Token(t) => t.kind(),
}
}
pub fn parent(&self) -> Option<Node<M>> {
match self {
NodeOrToken::Node(n) => n.parent(),
NodeOrToken::Token(t) => t.parent(),
}
}
pub fn into_node(self) -> Option<Node<M>> {
match self {
NodeOrToken::Node(n) => Some(n),
NodeOrToken::Token(_) => None,
}
}
pub fn into_token(self) -> Option<Token<M>> {
match self {
NodeOrToken::Node(_) => None,
NodeOrToken::Token(t) => Some(t),
}
}
pub fn ancestors(&self) -> impl Iterator<Item = Node<M>> {
let first = match self {
NodeOrToken::Node(n) => n.parent(),
NodeOrToken::Token(t) => t.parent(),
};
iter::successors(first, Node::parent)
}
pub fn prev_sibling_or_token(&self) -> Option<NodeOrToken<M>> {
match self {
NodeOrToken::Node(n) => n.prev_sibling_or_token(),
NodeOrToken::Token(t) => t.prev_sibling_or_token(),
}
}
pub fn next_sibling_or_token(&self) -> Option<NodeOrToken<M>> {
match self {
NodeOrToken::Node(n) => n.next_sibling_or_token(),
NodeOrToken::Token(t) => t.next_sibling_or_token(),
}
}
pub fn first_child_or_token(&self) -> Option<NodeOrToken<M>> {
match self {
NodeOrToken::Node(n) => n.first_child_or_token(),
NodeOrToken::Token(_) => None,
}
}
pub fn span(&self) -> Span {
match self {
NodeOrToken::Node(n) => n.span(),
NodeOrToken::Token(t) => t.span(),
}
}
pub fn start_pos<E: Encoding>(&self) -> Position<E> {
match self {
NodeOrToken::Node(n) => n.start_pos(),
NodeOrToken::Token(t) => t.start_pos(),
}
}
pub fn end_pos<E: Encoding>(&self) -> Position<E> {
match self {
NodeOrToken::Node(n) => n.end_pos(),
NodeOrToken::Token(t) => t.end_pos(),
}
}
}
impl NodeOrToken<Mutable> {
pub fn detach(&self) {
match self {
NodeOrToken::Node(n) => n.detach(),
NodeOrToken::Token(t) => t.detach(),
}
}
}
#[doc(hidden)]
impl<M> From<rowan::SyntaxElement<YARA>> for NodeOrToken<M> {
fn from(value: rowan::SyntaxElement<YARA>) -> Self {
match value {
rowan::SyntaxElement::Node(node) => Self::Node(Node::new(node)),
rowan::SyntaxElement::Token(token) => {
Self::Token(Token::new(token))
}
}
}
}
#[doc(hidden)]
impl<M> From<NodeOrToken<M>> for rowan::SyntaxElement<YARA> {
fn from(value: NodeOrToken<M>) -> Self {
match value {
NodeOrToken::Node(n) => rowan::SyntaxElement::Node(n.inner),
NodeOrToken::Token(t) => rowan::SyntaxElement::Token(t.inner),
}
}
}
#[doc(hidden)]
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct Node<M> {
inner: rowan::SyntaxNode<YARA>,
_mutability: PhantomData<M>,
}
impl<M> Node<M> {
fn new(inner: rowan::SyntaxNode<YARA>) -> Self {
Self { inner, _mutability: PhantomData }
}
}
impl<M: Clone> Node<M> {
#[inline]
pub fn kind(&self) -> SyntaxKind {
self.inner.kind()
}
#[inline]
pub fn text(&self) -> Text {
Text(self.inner.text())
}
#[inline]
pub fn span(&self) -> Span {
Span(self.inner.text_range().into())
}
#[inline]
pub fn start_pos<E: Encoding>(&self) -> Position<E> {
self.first_token().unwrap().start_pos()
}
#[inline]
pub fn end_pos<E: Encoding>(&self) -> Position<E> {
self.last_token().unwrap().end_pos()
}
#[inline]
pub fn parent(&self) -> Option<Node<M>> {
self.inner.parent().map(Node::new)
}
#[inline]
pub fn ancestors(&self) -> impl Iterator<Item = Node<M>> {
iter::successors(self.parent(), Node::parent)
}
pub fn children(&self) -> Nodes<M> {
Nodes { inner: self.inner.children(), _mutability: PhantomData }
}
#[inline]
pub fn root(&self) -> Node<M> {
self.ancestors().last().unwrap_or_else(|| self.clone())
}
pub fn children_with_tokens(&self) -> NodesAndTokens<M> {
NodesAndTokens {
inner: self.inner.children_with_tokens(),
_mutability: PhantomData,
}
}
#[inline]
pub fn first_child(&self) -> Option<Node<M>> {
self.inner.first_child().map(Node::new)
}
#[inline]
pub fn last_child(&self) -> Option<Node<M>> {
self.inner.last_child().map(Node::new)
}
#[inline]
pub fn first_token(&self) -> Option<Token<M>> {
self.inner.first_token().map(Token::new)
}
#[inline]
pub fn last_token(&self) -> Option<Token<M>> {
self.inner.last_token().map(Token::new)
}
#[inline]
pub fn first_child_or_token(&self) -> Option<NodeOrToken<M>> {
self.inner.first_child_or_token().map(|x| x.into())
}
#[inline]
pub fn last_child_or_token(&self) -> Option<NodeOrToken<M>> {
self.inner.last_child_or_token().map(|x| x.into())
}
#[inline]
pub fn next_sibling(&self) -> Option<Node<M>> {
self.inner.next_sibling().map(Node::new)
}
#[inline]
pub fn prev_sibling(&self) -> Option<Node<M>> {
self.inner.prev_sibling().map(Node::new)
}
#[inline]
pub fn next_sibling_or_token(&self) -> Option<NodeOrToken<M>> {
self.inner.next_sibling_or_token().map(|x| x.into())
}
#[inline]
pub fn prev_sibling_or_token(&self) -> Option<NodeOrToken<M>> {
self.inner.prev_sibling_or_token().map(|x| x.into())
}
pub fn siblings(
&self,
direction: Direction,
) -> impl Iterator<Item = Node<M>> {
let direction = match direction {
Direction::Next => rowan::Direction::Next,
Direction::Prev => rowan::Direction::Prev,
};
self.inner.siblings(direction).skip(1).map(Node::new)
}
pub fn siblings_with_tokens(
&self,
direction: Direction,
) -> impl Iterator<Item = NodeOrToken<M>> {
let direction = match direction {
Direction::Next => rowan::Direction::Next,
Direction::Prev => rowan::Direction::Prev,
};
self.inner.siblings_with_tokens(direction).skip(1).map(|x| x.into())
}
pub fn token_at_offset(&self, offset: usize) -> Option<Token<M>> {
if !self.span().range().contains(&offset) {
return None;
}
self.inner
.token_at_offset(offset.try_into().ok()?)
.right_biased()
.map(Token::new)
}
pub fn token_at_position<E: Encoding, P: Into<Position<E>>>(
&self,
position: P,
) -> Option<Token<M>> {
let position = position.into();
let mut line = 0;
let mut col = 0;
let mut next_token = self.root().first_token();
while let Some(token) = next_token {
let token_len = token.len::<E>();
if position.line == line
&& position.column >= col
&& position.column < col + token_len
{
return Some(token);
}
match token.kind() {
NEWLINE => {
line += 1;
col = 0;
}
COMMENT => {
let comment = token.text();
let newlines =
comment.chars().filter(|c| *c == '\n').count();
line += newlines;
if line > position.line {
return Some(token);
}
let last_line = match comment.rfind('\n') {
Some(idx) => &comment[idx + 1..],
None => comment, };
if newlines > 0 {
col = 0;
}
col += E::len(last_line);
if line == position.line && col > position.column {
return Some(token);
}
}
_ => {
col += token_len;
}
}
if line > position.line {
return None;
}
next_token = token.next_token();
}
None
}
}
impl Node<Immutable> {
pub fn into_mut(self) -> Node<Mutable> {
Node::new(self.inner.clone_for_update())
}
}
impl Node<Mutable> {
pub fn detach(&self) {
self.inner.detach()
}
}
#[doc(hidden)]
pub struct Nodes<M> {
inner: rowan::SyntaxNodeChildren<YARA>,
_mutability: PhantomData<M>,
}
impl<M> Iterator for Nodes<M> {
type Item = Node<M>;
fn next(&mut self) -> Option<Self::Item> {
self.inner.next().map(Node::new)
}
}
#[doc(hidden)]
pub struct NodesAndTokens<M> {
inner: rowan::SyntaxElementChildren<YARA>,
_mutability: PhantomData<M>,
}
impl<M> Iterator for NodesAndTokens<M> {
type Item = NodeOrToken<M>;
fn next(&mut self) -> Option<Self::Item> {
self.inner.next().map(|x| x.into())
}
}