use std::collections::{BTreeMap, HashSet};
use std::path::Path;
use std::str::FromStr;
use onig::{RegSet, RegSetLead, Regex, Region, SearchOptions};
use crate::json;
use crate::raw::{RawCapture, RawGrammar, RawPattern, first_line_patterns};
use crate::util::{key, next_char_boundary, path_keys, trim_line_end};
use crate::{Error, MAX_INCLUDE_DEPTH};
mod end;
mod pattern;
mod tokenize;
pub use tokenize::LineTokenizer;
use pattern::{OpenRule, PatternSet, ScopeInterner};
pub(crate) const PLAIN_TEXT_NAME: &str = "Plain Text";
#[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
pub struct ScopeId(
usize,
);
impl ScopeId {
pub const fn index(self) -> usize {
self.0
}
pub(crate) const fn new(index: usize) -> Self {
Self(index)
}
}
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub struct ScopeSpan {
pub start: usize,
pub end: usize,
pub scope: ScopeId,
}
#[derive(Clone, Debug, Default, Eq, PartialEq)]
pub struct LineState {
stack: Vec<OpenRule>,
}
impl LineState {
pub fn is_empty(&self) -> bool {
self.stack.is_empty()
}
pub fn depth(&self) -> usize {
self.stack.len()
}
}
#[derive(Debug)]
pub struct Grammar {
pub name: String,
pub scope_name: String,
pub scopes: Vec<String>,
pub kind: GrammarKind,
file_types: HashSet<String>,
first_line_match: Vec<Regex>,
patterns: PatternSet,
rule_paths: Vec<Box<[usize]>>,
}
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub enum GrammarKind {
TextMate,
Json,
}
impl Grammar {
pub fn compile(raw: &RawGrammar) -> Result<Self, Error> {
let mut next_rule = 0;
let mut interner = ScopeInterner::default();
let patterns = PatternSet::compile(raw, &raw.patterns, &mut next_rule, 0, &mut interner)?;
let first_line_match = first_line_patterns(raw.first_line_match.as_ref())
.iter()
.map(|pattern| Regex::new(pattern).map_err(|_| Error::InvalidRegex(pattern.clone())))
.collect::<Result<Vec<_>, _>>()?;
let rule_paths = patterns.collect_rule_paths(next_rule);
Ok(Self {
name: raw.display_name.clone().unwrap_or_else(|| raw.name.clone()),
scope_name: raw.scope_name.clone(),
file_types: raw
.file_types
.as_deref()
.unwrap_or_default()
.iter()
.chain(std::iter::once(&raw.name))
.map(|item| key(item))
.collect(),
first_line_match,
patterns,
rule_paths,
scopes: interner.scopes,
kind: GrammarKind::TextMate,
})
}
pub fn plain_text() -> Self {
Self {
name: PLAIN_TEXT_NAME.to_owned(),
scope_name: "text.plain".to_owned(),
file_types: ["txt".to_owned(), "text".to_owned()].into_iter().collect(),
first_line_match: Vec::new(),
patterns: PatternSet::empty(),
rule_paths: Vec::new(),
scopes: Vec::new(),
kind: GrammarKind::TextMate,
}
}
pub fn json() -> Self {
Self {
name: "JSON".to_owned(),
scope_name: "source.json".to_owned(),
file_types: ["json".to_owned()].into_iter().collect(),
first_line_match: Vec::new(),
patterns: PatternSet::empty(),
rule_paths: Vec::new(),
scopes: json::SCOPES
.iter()
.map(|scope| (*scope).to_owned())
.collect(),
kind: GrammarKind::Json,
}
}
pub fn matches_name(&self, name: &str) -> bool {
name.eq_ignore_ascii_case(&self.name)
|| name.eq_ignore_ascii_case(&self.scope_name)
|| self.file_types.contains(&key(name))
}
pub fn matches_path(&self, path: &Path) -> bool {
path_keys(path).any(|key| self.file_types.contains(&key))
}
pub fn matches_first_line(&self, line: &str) -> bool {
self.first_line_match
.iter()
.any(|regex| regex.find(line).is_some())
}
pub fn tokenize_line(&self, state: &mut LineState, line: &str) -> Vec<ScopeSpan> {
let mut spans = Vec::new();
self.tokenize_line_into(state, line, &mut spans);
spans
}
pub fn tokenize_line_into(
&self,
state: &mut LineState,
line: &str,
spans: &mut Vec<ScopeSpan>,
) {
LineTokenizer::new(self).tokenize_line_into(state, line, spans);
}
fn pattern_by_rule(&self, rule_id: usize) -> Option<&pattern::Pattern> {
let path = self.rule_paths.get(rule_id)?;
self.patterns.pattern_at(path)
}
}
impl FromStr for Grammar {
type Err = Error;
fn from_str(input: &str) -> Result<Self, Self::Err> {
let raw = RawGrammar::from_str(input)?;
Self::compile(&raw)
}
}