relex 1.0.0

a library for building a regex-based lexer
Documentation
use std::fmt::Debug;

/// You must implement this trait for your own custom TokenKind's
/// For example:
///
/// ```rust
/// use relex::TokenKind;
///
/// #[derive(Debug, Clone, PartialEq)]
/// enum MyToken {
///   Whitespace,
///   ID,
///   Eof,
///   Unrecognized,
/// }
/// impl TokenKind for MyToken {
///   fn eof() -> Self { MyToken::Eof }
///   fn unrecognized() -> Self { MyToken::Unrecognized }
/// }
/// ```
pub trait TokenKind: Clone + PartialEq {
  /// Constructs a TokenKind denoting that the token of interest is "unrecognized"
  /// (i.e., unmatched by any given regex)
  fn unrecognized() -> Self;
  fn is_unrecognized(&self) -> bool {
    self == &Self::unrecognized()
  }

  /// Constructs a TokenKind denoting that the token of interest is at the end of the input
  fn eof() -> Self;
  fn is_eof(&self) -> bool {
    self == &Self::eof()
  }
}

/// Represents a detected token
#[derive(Debug, PartialEq)]
pub struct Token<'a, K: TokenKind> {
  pub kind: K,
  pub start: usize,
  pub end: usize,
  pub text: &'a str,
  pub skip: bool,
  /// In some cases (viz. when an "unrecognized" token is returned),
  /// whatever token comes next is cached here.
  pub next: Option<Box<Token<'a, K>>>,
  /// The tokens skipped over to get here.
  pub skipped: Vec<Token<'a, K>>,
  pub captures: Option<Vec<Option<(usize, usize)>>>,
}
impl<'a, K: TokenKind> Token<'a, K> {
  pub fn new(source: &'a str, kind: K, start: usize, end: usize) -> Token<'a, K> {
    Token {
      kind,
      start,
      end,
      text: &source[start..end],
      skip: false,
      skipped: vec![],
      next: None,
      captures: None,
    }
  }
  pub fn from_text(kind: K, text: &'a str, start: usize) -> Token<'a, K> {
    Token {
      kind,
      start,
      end: start + text.len(),
      text,
      skip: false,
      skipped: vec![],
      next: None,
      captures: None,
    }
  }

  pub fn eof(source: &'a str) -> Token<'a, K> {
    Token {
      kind: K::eof(),
      start: source.len(),
      end: source.len(),
      text: "",
      skip: false,
      next: None,
      skipped: vec![],
      captures: None,
    }
  }

  pub fn skip(mut self, skip: bool) -> Self {
    self.skip = skip;
    self
  }

  pub fn skipped(mut self, skipped: Vec<Token<'a, K>>) -> Self {
    self.skipped = skipped;
    self
  }

  pub fn next(mut self, next: Option<Box<Token<'a, K>>>) -> Self {
    self.next = next;
    self
  }

  pub fn captures(mut self, captures: Option<Vec<Option<(usize, usize)>>>) -> Self {
    self.captures = captures;
    self
  }

  pub fn get_capture(&self, idx: usize) -> Option<TokenCapture<'a>> {
    self
      .captures
      .as_ref()
      .and_then(|captures| captures[idx])
      .map(|(start, end)| TokenCapture {
        text: &self.text[start..end],
        start,
        end,
      })
  }
}

/// Represents information for a given capture for a given token.
#[derive(Debug, PartialEq)]
pub struct TokenCapture<'a> {
  /// the capture text
  pub text: &'a str,

  /// the start index: relative to the _token_ text (i.e., not absolute within the source string)
  pub start: usize,

  /// the end index: relative to the _token_ text (i.e., not absolute within the source string)
  pub end: usize,
}

#[cfg(test)]
impl TokenKind for &'static str {
  fn unrecognized() -> Self {
    "UNRECOGNIZED"
  }
  fn eof() -> Self {
    "EOF"
  }
}