1use std::collections::{BTreeMap, HashSet};
9use std::path::Path;
10use std::str::FromStr;
11
12use onig::{RegSet, RegSetLead, Regex, Region, SearchOptions};
13
14use crate::json;
15use crate::raw::{RawCapture, RawGrammar, RawPattern, first_line_patterns};
16use crate::util::{key, next_char_boundary, path_keys, trim_line_end};
17use crate::{Error, MAX_INCLUDE_DEPTH};
18
19mod end;
20mod pattern;
21mod tokenize;
22
23pub use tokenize::LineTokenizer;
24
25use pattern::{OpenRule, PatternSet, ScopeInterner};
26
27pub(crate) const PLAIN_TEXT_NAME: &str = "Plain Text";
29
30#[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
32pub struct ScopeId(
33 usize,
35);
36
37impl ScopeId {
38 pub const fn index(self) -> usize {
40 self.0
41 }
42
43 pub(crate) const fn new(index: usize) -> Self {
45 Self(index)
46 }
47}
48
49#[derive(Clone, Copy, Debug, Eq, PartialEq)]
51pub struct ScopeSpan {
52 pub start: usize,
54 pub end: usize,
56 pub scope: ScopeId,
58}
59
60#[derive(Clone, Debug, Default, Eq, PartialEq)]
62pub struct LineState {
63 stack: Vec<OpenRule>,
65}
66
67impl LineState {
68 pub fn is_empty(&self) -> bool {
70 self.stack.is_empty()
71 }
72
73 pub fn depth(&self) -> usize {
75 self.stack.len()
76 }
77}
78
79#[derive(Debug)]
81pub struct Grammar {
82 pub name: String,
84 pub scope_name: String,
86 pub scopes: Vec<String>,
88 pub kind: GrammarKind,
90 file_types: HashSet<String>,
92 first_line_match: Vec<Regex>,
94 patterns: PatternSet,
96 rule_paths: Vec<Box<[usize]>>,
98}
99
100#[derive(Clone, Copy, Debug, Eq, PartialEq)]
102pub enum GrammarKind {
103 TextMate,
105 Json,
107}
108
109impl Grammar {
110 pub fn compile(raw: &RawGrammar) -> Result<Self, Error> {
112 let mut next_rule = 0;
113 let mut interner = ScopeInterner::default();
114 let patterns = PatternSet::compile(raw, &raw.patterns, &mut next_rule, 0, &mut interner)?;
115 let first_line_match = first_line_patterns(raw.first_line_match.as_ref())
116 .iter()
117 .map(|pattern| Regex::new(pattern).map_err(|_| Error::InvalidRegex(pattern.clone())))
118 .collect::<Result<Vec<_>, _>>()?;
119 let rule_paths = patterns.collect_rule_paths(next_rule);
120
121 Ok(Self {
122 name: raw.display_name.clone().unwrap_or_else(|| raw.name.clone()),
123 scope_name: raw.scope_name.clone(),
124 file_types: raw
125 .file_types
126 .as_deref()
127 .unwrap_or_default()
128 .iter()
129 .chain(std::iter::once(&raw.name))
130 .map(|item| key(item))
131 .collect(),
132 first_line_match,
133 patterns,
134 rule_paths,
135 scopes: interner.scopes,
136 kind: GrammarKind::TextMate,
137 })
138 }
139
140 pub fn plain_text() -> Self {
142 Self {
143 name: PLAIN_TEXT_NAME.to_owned(),
144 scope_name: "text.plain".to_owned(),
145 file_types: ["txt".to_owned(), "text".to_owned()].into_iter().collect(),
146 first_line_match: Vec::new(),
147 patterns: PatternSet::empty(),
148 rule_paths: Vec::new(),
149 scopes: Vec::new(),
150 kind: GrammarKind::TextMate,
151 }
152 }
153
154 pub fn json() -> Self {
160 Self {
161 name: "JSON".to_owned(),
162 scope_name: "source.json".to_owned(),
163 file_types: ["json".to_owned()].into_iter().collect(),
164 first_line_match: Vec::new(),
165 patterns: PatternSet::empty(),
166 rule_paths: Vec::new(),
167 scopes: json::SCOPES
168 .iter()
169 .map(|scope| (*scope).to_owned())
170 .collect(),
171 kind: GrammarKind::Json,
172 }
173 }
174
175 pub fn matches_name(&self, name: &str) -> bool {
177 name.eq_ignore_ascii_case(&self.name)
178 || name.eq_ignore_ascii_case(&self.scope_name)
179 || self.file_types.contains(&key(name))
180 }
181
182 pub fn matches_path(&self, path: &Path) -> bool {
184 path_keys(path).any(|key| self.file_types.contains(&key))
185 }
186
187 pub fn matches_first_line(&self, line: &str) -> bool {
189 self.first_line_match
190 .iter()
191 .any(|regex| regex.find(line).is_some())
192 }
193
194 pub fn tokenize_line(&self, state: &mut LineState, line: &str) -> Vec<ScopeSpan> {
196 let mut spans = Vec::new();
197 self.tokenize_line_into(state, line, &mut spans);
198 spans
199 }
200
201 pub fn tokenize_line_into(
203 &self,
204 state: &mut LineState,
205 line: &str,
206 spans: &mut Vec<ScopeSpan>,
207 ) {
208 LineTokenizer::new(self).tokenize_line_into(state, line, spans);
209 }
210
211 fn pattern_by_rule(&self, rule_id: usize) -> Option<&pattern::Pattern> {
213 let path = self.rule_paths.get(rule_id)?;
214 self.patterns.pattern_at(path)
215 }
216}
217
218impl FromStr for Grammar {
219 type Err = Error;
220
221 fn from_str(input: &str) -> Result<Self, Self::Err> {
223 let raw = RawGrammar::from_str(input)?;
224 Self::compile(&raw)
225 }
226}