use std::fmt::{Debug, Display, Formatter};
use std::iter;
use std::marker::PhantomData;
use std::str::{from_utf8, Utf8Error};
use rowan::{GreenNodeBuilder, GreenToken, SyntaxNode};
use crate::cst::SyntaxKind::{COMMENT, NEWLINE, WHITESPACE};
use crate::{Parser, Span};
pub use syntax_kind::SyntaxKind;
pub(crate) mod syntax_kind;
pub(crate) mod syntax_stream;
#[cfg(test)]
mod tests;
#[derive(Debug, PartialEq)]
pub enum Event {
Begin { kind: SyntaxKind, span: Span },
End { kind: SyntaxKind, span: Span },
Token { kind: SyntaxKind, span: Span },
Error { message: String, span: Span },
}
pub struct CSTStream<'src, I>
where
I: Iterator<Item = Event>,
{
source: &'src [u8],
events: I,
whitespaces: bool,
newlines: bool,
comments: bool,
}
impl<'src, I> CSTStream<'src, I>
where
I: Iterator<Item = Event>,
{
#[doc(hidden)]
pub fn new(source: &'src [u8], events: I) -> Self {
Self {
source,
events,
whitespaces: true,
newlines: true,
comments: true,
}
}
pub fn source(&self) -> &'src [u8] {
self.source
}
pub fn whitespaces(mut self, yes: bool) -> Self {
self.whitespaces = yes;
self
}
pub fn newlines(mut self, yes: bool) -> Self {
self.newlines = yes;
self
}
pub fn comments(mut self, yes: bool) -> Self {
self.comments = yes;
self
}
}
impl<I> Iterator for CSTStream<'_, I>
where
I: Iterator<Item = Event>,
{
type Item = Event;
fn next(&mut self) -> Option<Self::Item> {
if self.whitespaces && self.newlines {
self.events.next()
} else {
loop {
match self.events.next()? {
token @ Event::Token { kind: WHITESPACE, .. } => {
if self.whitespaces {
break Some(token);
}
}
token @ Event::Token { kind: NEWLINE, .. } => {
if self.newlines {
break Some(token);
}
}
token @ Event::Token { kind: COMMENT, .. } => {
if self.comments {
break Some(token);
}
}
token => break Some(token),
}
}
}
}
}
impl<'src> From<Parser<'src>> for CSTStream<'src, Parser<'src>> {
fn from(parser: Parser<'src>) -> Self {
CSTStream::new(parser.source(), parser)
}
}
#[allow(clippy::upper_case_acronyms)]
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
struct YARA();
impl rowan::Language for YARA {
type Kind = SyntaxKind;
fn kind_from_raw(raw: rowan::SyntaxKind) -> SyntaxKind {
unsafe { std::mem::transmute::<u16, SyntaxKind>(raw.0) }
}
fn kind_to_raw(kind: SyntaxKind) -> rowan::SyntaxKind {
kind.into()
}
}
#[doc(hidden)]
pub struct CST {
tree: rowan::SyntaxNode<YARA>,
errors: Vec<(Span, String)>,
}
impl Debug for CST {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
write!(f, "{:#?}", self.tree)?;
if !self.errors.is_empty() {
writeln!(f, "\nERRORS:")?;
for (span, err) in &self.errors {
writeln!(f, "- {span}: {err}")?;
}
}
Ok(())
}
}
impl CST {
pub fn root(&self) -> Node<Immutable> {
Node::new(self.tree.clone())
}
}
impl TryFrom<Parser<'_>> for CST {
type Error = Utf8Error;
fn try_from(parser: Parser) -> Result<Self, Utf8Error> {
Self::try_from(CSTStream::new(parser.source(), parser))
}
}
impl<'src, I> TryFrom<CSTStream<'src, I>> for CST
where
I: Iterator<Item = Event>,
{
type Error = Utf8Error;
fn try_from(cst: CSTStream<'src, I>) -> Result<Self, Utf8Error> {
let source = cst.source();
let mut builder = GreenNodeBuilder::new();
let mut prev_token_span: Option<Span> = None;
let mut errors = Vec::new();
for node in cst {
match node {
Event::Begin { kind, .. } => builder.start_node(kind.into()),
Event::End { .. } => builder.finish_node(),
Event::Token { kind, span } => {
if let Some(prev_token_span) = prev_token_span {
assert_eq!(
prev_token_span.end(),
span.start(),
"gap in the CST, one token ends at {} and the next one starts at {}",
prev_token_span.end(),
span.start(),
);
}
let token = source.get(span.range()).unwrap();
let token = from_utf8(token)?;
builder.token(kind.into(), token);
prev_token_span = Some(span);
}
Event::Error { message, span } => errors.push((span, message)),
}
}
Ok(Self {
tree: rowan::SyntaxNode::new_root(builder.finish()),
errors,
})
}
}
#[doc(hidden)]
pub enum Direction {
Next,
Prev,
}
#[derive(PartialEq, Eq)]
#[doc(hidden)]
pub struct Text(rowan::SyntaxText);
impl Text {
#[inline]
pub fn len(&self) -> usize {
self.0.len().into()
}
#[inline]
pub fn is_empty(&self) -> bool {
self.0.is_empty()
}
#[inline]
pub fn try_fold_chunks<T, F, E>(&self, init: T, f: F) -> Result<T, E>
where
F: FnMut(T, &str) -> Result<T, E>,
{
self.0.try_fold_chunks(init, f)
}
pub fn try_for_each_chunks<F, E>(&self, f: F) -> Result<(), E>
where
F: FnMut(&str) -> Result<(), E>,
{
self.0.try_for_each_chunk(f)
}
pub fn for_each_chunks<F>(&self, f: F)
where
F: FnMut(&str),
{
self.0.for_each_chunk(f)
}
}
impl Display for Text {
#[inline]
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
std::fmt::Display::fmt(&self.0, f)
}
}
impl Debug for Text {
#[inline]
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
std::fmt::Debug::fmt(&self.0, f)
}
}
impl PartialEq<Text> for str {
fn eq(&self, other: &Text) -> bool {
other.0 == self
}
}
impl PartialEq<Text> for &str {
fn eq(&self, other: &Text) -> bool {
other == self
}
}
impl PartialEq<&'_ str> for Text {
fn eq(&self, other: &&str) -> bool {
self.0 == *other
}
}
#[derive(Clone, Debug, PartialEq, Eq)]
#[doc(hidden)]
pub struct Mutable;
#[derive(Clone, Debug, PartialEq, Eq)]
#[doc(hidden)]
pub struct Immutable;
#[derive(PartialEq, Eq)]
#[doc(hidden)]
pub struct Token<M> {
inner: rowan::SyntaxToken<YARA>,
_state: PhantomData<M>,
}
impl<M> Token<M> {
fn new(inner: rowan::SyntaxToken<YARA>) -> Self {
Self { inner, _state: PhantomData }
}
}
impl<M> Token<M> {
#[inline]
pub fn kind(&self) -> SyntaxKind {
self.inner.kind()
}
#[inline]
pub fn text(&self) -> &str {
self.inner.text()
}
#[inline]
pub fn span(&self) -> Span {
Span(self.inner.text_range().into())
}
#[inline]
pub fn parent(&self) -> Option<Node<M>> {
self.inner.parent().map(Node::new)
}
pub fn ancestors(&self) -> impl Iterator<Item = Node<M>> {
self.inner.parent_ancestors().map(Node::new)
}
#[inline]
pub fn prev_token(&self) -> Option<Token<M>> {
self.inner.prev_token().map(Token::new)
}
#[inline]
pub fn next_token(&self) -> Option<Token<M>> {
self.inner.next_token().map(Token::new)
}
#[inline]
pub fn prev_sibling_or_token(&self) -> Option<NodeOrToken<M>> {
self.inner.prev_sibling_or_token().map(|x| x.into())
}
#[inline]
pub fn next_sibling_or_token(&self) -> Option<NodeOrToken<M>> {
self.inner.next_sibling_or_token().map(|x| x.into())
}
}
impl<M> Display for Token<M> {
#[inline]
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
std::fmt::Display::fmt(&self.inner, f)
}
}
impl<M> Debug for Token<M> {
#[inline]
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
std::fmt::Debug::fmt(&self.inner, f)
}
}
impl Token<Mutable> {
#[inline]
pub fn detach(&self) {
self.inner.detach()
}
pub fn replace(&mut self, text: &str) -> Node<Mutable> {
Node::new(SyntaxNode::new_root(
self.inner.replace_with(GreenToken::new(self.kind().into(), text)),
))
}
}
#[derive(PartialEq, Eq, Debug)]
#[doc(hidden)]
pub enum NodeOrToken<M> {
Node(Node<M>),
Token(Token<M>),
}
impl<M> NodeOrToken<M> {
pub fn kind(&self) -> SyntaxKind {
match self {
NodeOrToken::Node(n) => n.kind(),
NodeOrToken::Token(t) => t.kind(),
}
}
pub fn parent(&self) -> Option<Node<M>> {
match self {
NodeOrToken::Node(n) => n.parent(),
NodeOrToken::Token(t) => t.parent(),
}
}
pub fn ancestors(&self) -> impl Iterator<Item = Node<M>> {
let first = match self {
NodeOrToken::Node(n) => n.parent(),
NodeOrToken::Token(t) => t.parent(),
};
iter::successors(first, Node::parent)
}
pub fn prev_sibling_or_token(&self) -> Option<NodeOrToken<M>> {
match self {
NodeOrToken::Node(n) => n.prev_sibling_or_token(),
NodeOrToken::Token(t) => t.prev_sibling_or_token(),
}
}
pub fn next_sibling_or_token(&self) -> Option<NodeOrToken<M>> {
match self {
NodeOrToken::Node(n) => n.next_sibling_or_token(),
NodeOrToken::Token(t) => t.next_sibling_or_token(),
}
}
}
impl NodeOrToken<Mutable> {
pub fn detach(&self) {
match self {
NodeOrToken::Node(n) => n.detach(),
NodeOrToken::Token(t) => t.detach(),
}
}
}
#[doc(hidden)]
impl<M> From<rowan::SyntaxElement<YARA>> for NodeOrToken<M> {
fn from(value: rowan::SyntaxElement<YARA>) -> Self {
match value {
rowan::SyntaxElement::Node(node) => Self::Node(Node::new(node)),
rowan::SyntaxElement::Token(token) => {
Self::Token(Token::new(token))
}
}
}
}
#[doc(hidden)]
impl<M> From<NodeOrToken<M>> for rowan::SyntaxElement<YARA> {
fn from(value: NodeOrToken<M>) -> Self {
match value {
NodeOrToken::Node(n) => rowan::SyntaxElement::Node(n.inner),
NodeOrToken::Token(t) => rowan::SyntaxElement::Token(t.inner),
}
}
}
#[derive(Clone, Debug, PartialEq, Eq)]
#[doc(hidden)]
pub struct Node<M> {
inner: rowan::SyntaxNode<YARA>,
_mutability: PhantomData<M>,
}
impl<M> Node<M> {
fn new(inner: rowan::SyntaxNode<YARA>) -> Self {
Self { inner, _mutability: PhantomData }
}
}
impl<M> Node<M> {
pub fn kind(&self) -> SyntaxKind {
self.inner.kind()
}
pub fn text(&self) -> Text {
Text(self.inner.text())
}
pub fn span(&self) -> Span {
Span(self.inner.text_range().into())
}
pub fn parent(&self) -> Option<Node<M>> {
self.inner.parent().map(Node::new)
}
pub fn ancestors(&self) -> impl Iterator<Item = Node<M>> {
iter::successors(self.parent(), Node::parent)
}
pub fn children(&self) -> Nodes<M> {
Nodes { inner: self.inner.children(), _mutability: PhantomData }
}
pub fn children_with_tokens(&self) -> NodesAndTokens<M> {
NodesAndTokens {
inner: self.inner.children_with_tokens(),
_mutability: PhantomData,
}
}
pub fn first_child(&self) -> Option<Node<M>> {
self.inner.first_child().map(Node::new)
}
pub fn last_child(&self) -> Option<Node<M>> {
self.inner.last_child().map(Node::new)
}
pub fn first_token(&self) -> Option<Token<M>> {
self.inner.first_token().map(Token::new)
}
pub fn last_token(&self) -> Option<Token<M>> {
self.inner.last_token().map(Token::new)
}
pub fn first_child_or_token(&self) -> Option<NodeOrToken<M>> {
self.inner.first_child_or_token().map(|x| x.into())
}
pub fn last_child_or_token(&self) -> Option<NodeOrToken<M>> {
self.inner.last_child_or_token().map(|x| x.into())
}
pub fn next_sibling(&self) -> Option<Node<M>> {
self.inner.next_sibling().map(Node::new)
}
pub fn prev_sibling(&self) -> Option<Node<M>> {
self.inner.prev_sibling().map(Node::new)
}
pub fn next_sibling_or_token(&self) -> Option<NodeOrToken<M>> {
self.inner.next_sibling_or_token().map(|x| x.into())
}
pub fn prev_sibling_or_token(&self) -> Option<NodeOrToken<M>> {
self.inner.prev_sibling_or_token().map(|x| x.into())
}
pub fn siblings(
&self,
direction: Direction,
) -> impl Iterator<Item = Node<M>> {
let direction = match direction {
Direction::Next => rowan::Direction::Next,
Direction::Prev => rowan::Direction::Prev,
};
self.inner.siblings(direction).skip(1).map(Node::new)
}
pub fn siblings_with_tokens(
&self,
direction: Direction,
) -> impl Iterator<Item = NodeOrToken<M>> {
let direction = match direction {
Direction::Next => rowan::Direction::Next,
Direction::Prev => rowan::Direction::Prev,
};
self.inner.siblings_with_tokens(direction).skip(1).map(|x| x.into())
}
pub fn token_at_offset(&self, offset: usize) -> Option<Token<M>> {
if !self.span().range().contains(&offset) {
return None;
}
self.inner
.token_at_offset(offset.try_into().ok()?)
.right_biased()
.map(Token::new)
}
}
impl Node<Immutable> {
pub fn into_mut(self) -> Node<Mutable> {
Node::new(self.inner.clone_for_update())
}
}
impl Node<Mutable> {
pub fn detach(&self) {
self.inner.detach()
}
}
#[doc(hidden)]
pub struct Nodes<M> {
inner: rowan::SyntaxNodeChildren<YARA>,
_mutability: PhantomData<M>,
}
impl<M> Iterator for Nodes<M> {
type Item = Node<M>;
fn next(&mut self) -> Option<Self::Item> {
self.inner.next().map(Node::new)
}
}
#[doc(hidden)]
pub struct NodesAndTokens<M> {
inner: rowan::SyntaxElementChildren<YARA>,
_mutability: PhantomData<M>,
}
impl<M> Iterator for NodesAndTokens<M> {
type Item = NodeOrToken<M>;
fn next(&mut self) -> Option<Self::Item> {
self.inner.next().map(|x| x.into())
}
}