1use super::regex::{Regex, Region};
2use super::scope::*;
3use super::syntax_definition::*;
4use std::collections::HashMap;
5use std::error::Error;
6use std::ops::DerefMut;
7use std::path::Path;
8use yaml_rust::yaml::Hash;
9use yaml_rust::{ScanError, Yaml, YamlLoader};
10
11#[derive(Debug, thiserror::Error)]
12#[non_exhaustive]
13pub enum ParseSyntaxError {
14 #[error("Invalid YAML file syntax: {0}")]
16 InvalidYaml(#[from] ScanError),
17 #[error("The file must contain at least one YAML document")]
19 EmptyFile,
20 #[error("Missing mandatory key in YAML file: {0}")]
22 MissingMandatoryKey(&'static str),
23 #[error("Error while compiling regex '{0}': {1}")]
25 RegexCompileError(String, #[source] Box<dyn Error + Send + Sync + 'static>),
26 #[error("Invalid scope: {0}")]
28 InvalidScope(ParseScopeError),
29 #[error("Invalid file reference")]
31 BadFileRef,
32 #[error("Context 'main' is missing")]
34 MainMissing,
35 #[error("Type mismatch")]
39 TypeMismatch,
40}
41
42fn get_key<'a, R, F: FnOnce(&'a Yaml) -> Option<R>>(
43 map: &'a Hash,
44 key: &'static str,
45 f: F,
46) -> Result<R, ParseSyntaxError> {
47 map.get(&Yaml::String(key.to_owned()))
48 .ok_or(ParseSyntaxError::MissingMandatoryKey(key))
49 .and_then(|x| f(x).ok_or(ParseSyntaxError::TypeMismatch))
50}
51
52fn str_to_scopes(s: &str, repo: &mut ScopeRepository) -> Result<Vec<Scope>, ParseSyntaxError> {
53 s.split_whitespace()
54 .map(|scope| repo.build(scope).map_err(ParseSyntaxError::InvalidScope))
55 .collect()
56}
57
58struct ParserState<'a> {
59 scope_repo: &'a mut ScopeRepository,
60 variables: HashMap<String, String>,
61 variable_regex: Regex,
62 backref_regex: Regex,
63 lines_include_newline: bool,
64}
65
66static START_CONTEXT: &str = "
69__start:
70 - meta_include_prototype: false
71 - match: ''
72 push: __main
73__main:
74 - include: main
75";
76
77impl SyntaxDefinition {
78 pub fn load_from_str(
86 s: &str,
87 lines_include_newline: bool,
88 fallback_name: Option<&str>,
89 ) -> Result<SyntaxDefinition, ParseSyntaxError> {
90 let docs = match YamlLoader::load_from_str(s) {
91 Ok(x) => x,
92 Err(e) => return Err(ParseSyntaxError::InvalidYaml(e)),
93 };
94 if docs.is_empty() {
95 return Err(ParseSyntaxError::EmptyFile);
96 }
97 let doc = &docs[0];
98 let mut scope_repo = SCOPE_REPO.lock().unwrap();
99 SyntaxDefinition::parse_top_level(
100 doc,
101 scope_repo.deref_mut(),
102 lines_include_newline,
103 fallback_name,
104 )
105 }
106
107 fn parse_top_level(
108 doc: &Yaml,
109 scope_repo: &mut ScopeRepository,
110 lines_include_newline: bool,
111 fallback_name: Option<&str>,
112 ) -> Result<SyntaxDefinition, ParseSyntaxError> {
113 let h = doc.as_hash().ok_or(ParseSyntaxError::TypeMismatch)?;
114
115 let mut variables = HashMap::new();
116 if let Ok(map) = get_key(h, "variables", |x| x.as_hash()) {
117 for (key, value) in map.iter() {
118 if let (Some(key_str), Some(val_str)) = (key.as_str(), value.as_str()) {
119 variables.insert(key_str.to_owned(), val_str.to_owned());
120 }
121 }
122 }
123 let contexts_hash = get_key(h, "contexts", |x| x.as_hash())?;
124 let top_level_scope = scope_repo
125 .build(get_key(h, "scope", |x| x.as_str())?)
126 .map_err(ParseSyntaxError::InvalidScope)?;
127 let mut state = ParserState {
128 scope_repo,
129 variables,
130 variable_regex: Regex::new(r"\{\{([A-Za-z0-9_]+)\}\}".into()),
131 backref_regex: Regex::new(r"\\\d".into()),
132 lines_include_newline,
133 };
134
135 let mut contexts = SyntaxDefinition::parse_contexts(contexts_hash, &mut state)?;
136 if !contexts.contains_key("main") {
137 return Err(ParseSyntaxError::MainMissing);
138 }
139
140 SyntaxDefinition::add_initial_contexts(&mut contexts, &mut state, top_level_scope);
141
142 let mut file_extensions = Vec::new();
143 for extension_key in &["file_extensions", "hidden_file_extensions"] {
144 if let Ok(v) = get_key(h, extension_key, |x| x.as_vec()) {
145 file_extensions.extend(v.iter().filter_map(|y| y.as_str().map(|s| s.to_owned())))
146 }
147 }
148
149 let defn = SyntaxDefinition {
150 name: get_key(h, "name", |x| x.as_str())
151 .unwrap_or_else(|_| fallback_name.unwrap_or("Unnamed"))
152 .to_owned(),
153 scope: top_level_scope,
154 file_extensions,
155 first_line_match: get_key(h, "first_line_match", |x| x.as_str())
157 .ok()
158 .map(|s| s.to_owned()),
159 hidden: get_key(h, "hidden", |x| x.as_bool()).unwrap_or(false),
160
161 variables: state.variables,
162 contexts,
163 };
164 Ok(defn)
165 }
166
167 fn parse_contexts(
168 map: &Hash,
169 state: &mut ParserState<'_>,
170 ) -> Result<HashMap<String, Context>, ParseSyntaxError> {
171 let mut contexts = HashMap::new();
172 for (key, value) in map.iter() {
173 if let (Some(name), Some(val_vec)) = (key.as_str(), value.as_vec()) {
174 let is_prototype = name == "prototype";
175 let mut namer = ContextNamer::new(name);
176 SyntaxDefinition::parse_context(
177 val_vec,
178 state,
179 &mut contexts,
180 is_prototype,
181 &mut namer,
182 )?;
183 }
184 }
185
186 Ok(contexts)
187 }
188
189 fn parse_context(
190 vec: &[Yaml],
191 state: &mut ParserState<'_>,
193 contexts: &mut HashMap<String, Context>,
194 is_prototype: bool,
195 namer: &mut ContextNamer,
196 ) -> Result<String, ParseSyntaxError> {
197 let mut context = Context::new(!is_prototype);
198 let name = namer.next();
199
200 for y in vec.iter() {
201 let map = y.as_hash().ok_or(ParseSyntaxError::TypeMismatch)?;
202
203 let mut is_special = false;
204 if let Ok(x) = get_key(map, "meta_scope", |x| x.as_str()) {
205 context.meta_scope = str_to_scopes(x, state.scope_repo)?;
206 is_special = true;
207 }
208 if let Ok(x) = get_key(map, "meta_content_scope", |x| x.as_str()) {
209 context.meta_content_scope = str_to_scopes(x, state.scope_repo)?;
210 is_special = true;
211 }
212 if let Ok(x) = get_key(map, "meta_include_prototype", |x| x.as_bool()) {
213 context.meta_include_prototype = x;
214 is_special = true;
215 }
216 if let Ok(true) = get_key(map, "clear_scopes", |x| x.as_bool()) {
217 context.clear_scopes = Some(ClearAmount::All);
218 is_special = true;
219 }
220 if let Ok(x) = get_key(map, "clear_scopes", |x| x.as_i64()) {
221 context.clear_scopes = Some(ClearAmount::TopN(x as usize));
222 is_special = true;
223 }
224 if !is_special {
225 if let Ok(x) = get_key(map, "include", Some) {
226 let reference =
227 SyntaxDefinition::parse_reference(x, state, contexts, namer, false)?;
228 context.patterns.push(Pattern::Include(reference));
229 } else {
230 let pattern =
231 SyntaxDefinition::parse_match_pattern(map, state, contexts, namer)?;
232 if pattern.has_captures {
233 context.uses_backrefs = true;
234 }
235 context.patterns.push(Pattern::Match(pattern));
236 }
237 }
238 }
239
240 contexts.insert(name.clone(), context);
241 Ok(name)
242 }
243
244 fn parse_reference(
245 y: &Yaml,
246 state: &mut ParserState<'_>,
247 contexts: &mut HashMap<String, Context>,
248 namer: &mut ContextNamer,
249 with_escape: bool,
250 ) -> Result<ContextReference, ParseSyntaxError> {
251 if let Some(s) = y.as_str() {
252 let parts: Vec<&str> = s.split('#').collect();
253 let sub_context = if parts.len() > 1 {
254 Some(parts[1].to_owned())
255 } else {
256 None
257 };
258 if parts[0].starts_with("scope:") {
259 Ok(ContextReference::ByScope {
260 scope: state
261 .scope_repo
262 .build(&parts[0][6..])
263 .map_err(ParseSyntaxError::InvalidScope)?,
264 sub_context,
265 with_escape,
266 })
267 } else if parts[0].ends_with(".sublime-syntax") {
268 let stem = Path::new(parts[0])
269 .file_stem()
270 .and_then(|x| x.to_str())
271 .ok_or(ParseSyntaxError::BadFileRef)?;
272 Ok(ContextReference::File {
273 name: stem.to_owned(),
274 sub_context,
275 with_escape,
276 })
277 } else {
278 Ok(ContextReference::Named(parts[0].to_owned()))
279 }
280 } else if let Some(v) = y.as_vec() {
281 let subname = SyntaxDefinition::parse_context(v, state, contexts, false, namer)?;
282 Ok(ContextReference::Inline(subname))
283 } else {
284 Err(ParseSyntaxError::TypeMismatch)
285 }
286 }
287
288 fn parse_match_pattern(
289 map: &Hash,
290 state: &mut ParserState<'_>,
291 contexts: &mut HashMap<String, Context>,
292 namer: &mut ContextNamer,
293 ) -> Result<MatchPattern, ParseSyntaxError> {
294 let raw_regex = get_key(map, "match", |x| x.as_str())?;
295 let regex_str = Self::parse_regex(raw_regex, state)?;
296 let scope = get_key(map, "scope", |x| x.as_str())
299 .ok()
300 .map(|s| str_to_scopes(s, state.scope_repo))
301 .unwrap_or_else(|| Ok(vec![]))?;
302
303 let captures = if let Ok(map) = get_key(map, "captures", |x| x.as_hash()) {
304 Some(Self::parse_captures(map, ®ex_str, state)?)
305 } else {
306 None
307 };
308
309 let mut has_captures = false;
310 let operation = if get_key(map, "pop", Some).is_ok() {
311 has_captures =
313 state
314 .backref_regex
315 .search(®ex_str, 0, regex_str.len(), None, false)?;
316 MatchOperation::Pop
317 } else if let Ok(y) = get_key(map, "push", Some) {
318 MatchOperation::Push(SyntaxDefinition::parse_pushargs(y, state, contexts, namer)?)
319 } else if let Ok(y) = get_key(map, "set", Some) {
320 MatchOperation::Set(SyntaxDefinition::parse_pushargs(y, state, contexts, namer)?)
321 } else if let Ok(y) = get_key(map, "embed", Some) {
322 let mut embed_escape_context_yaml = vec![];
324 let mut commands = Hash::new();
325 commands.insert(
326 Yaml::String("meta_include_prototype".to_string()),
327 Yaml::Boolean(false),
328 );
329 embed_escape_context_yaml.push(Yaml::Hash(commands));
330 if let Ok(s) = get_key(map, "embed_scope", Some) {
331 commands = Hash::new();
332 commands.insert(Yaml::String("meta_content_scope".to_string()), s.clone());
333 embed_escape_context_yaml.push(Yaml::Hash(commands));
334 }
335 if let Ok(v) = get_key(map, "escape", Some) {
336 let mut match_map = Hash::new();
337 match_map.insert(Yaml::String("match".to_string()), v.clone());
338 match_map.insert(Yaml::String("pop".to_string()), Yaml::Boolean(true));
339 if let Ok(y) = get_key(map, "escape_captures", Some) {
340 match_map.insert(Yaml::String("captures".to_string()), y.clone());
341 }
342 embed_escape_context_yaml.push(Yaml::Hash(match_map));
343 let escape_context = SyntaxDefinition::parse_context(
344 &embed_escape_context_yaml,
345 state,
346 contexts,
347 false,
348 namer,
349 )?;
350 MatchOperation::Push(vec![
351 ContextReference::Inline(escape_context),
352 SyntaxDefinition::parse_reference(y, state, contexts, namer, true)?,
353 ])
354 } else {
355 return Err(ParseSyntaxError::MissingMandatoryKey("escape"));
356 }
357 } else {
358 MatchOperation::None
359 };
360
361 let with_prototype = if let Ok(v) = get_key(map, "with_prototype", |x| x.as_vec()) {
362 let subname = Self::parse_context(v, state, contexts, true, namer)?;
364 Some(ContextReference::Inline(subname))
365 } else if let Ok(v) = get_key(map, "escape", Some) {
366 let subname = namer.next();
367
368 let mut context = Context::new(false);
369 let mut match_map = Hash::new();
370 match_map.insert(
371 Yaml::String("match".to_string()),
372 Yaml::String(format!("(?={})", v.as_str().unwrap())),
373 );
374 match_map.insert(Yaml::String("pop".to_string()), Yaml::Boolean(true));
375 let pattern =
376 SyntaxDefinition::parse_match_pattern(&match_map, state, contexts, namer)?;
377 if pattern.has_captures {
378 context.uses_backrefs = true;
379 }
380 context.patterns.push(Pattern::Match(pattern));
381
382 contexts.insert(subname.clone(), context);
383 Some(ContextReference::Inline(subname))
384 } else {
385 None
386 };
387
388 let pattern = MatchPattern::new(
389 has_captures,
390 regex_str,
391 scope,
392 captures,
393 operation,
394 with_prototype,
395 );
396
397 Ok(pattern)
398 }
399
400 fn parse_pushargs(
401 y: &Yaml,
402 state: &mut ParserState<'_>,
403 contexts: &mut HashMap<String, Context>,
404 namer: &mut ContextNamer,
405 ) -> Result<Vec<ContextReference>, ParseSyntaxError> {
406 if y.as_vec().map_or(false, |v| {
408 !v.is_empty()
409 && (v[0].as_str().is_some()
410 || (v[0].as_vec().is_some() && v[0].as_vec().unwrap()[0].as_hash().is_some()))
411 }) {
412 y.as_vec()
414 .unwrap()
415 .iter()
416 .map(|x| SyntaxDefinition::parse_reference(x, state, contexts, namer, false))
417 .collect()
418 } else {
419 let reference = SyntaxDefinition::parse_reference(y, state, contexts, namer, false)?;
420 Ok(vec![reference])
421 }
422 }
423
424 fn parse_regex(raw_regex: &str, state: &ParserState<'_>) -> Result<String, ParseSyntaxError> {
425 let regex = Self::resolve_variables(raw_regex, state)?;
426 let regex = replace_posix_char_classes(regex);
427 let regex = if state.lines_include_newline {
428 regex_for_newlines(regex)
429 } else {
430 regex_for_no_newlines(regex)
434 };
435 Self::try_compile_regex(®ex)?;
436 Ok(regex)
437 }
438
439 fn resolve_variables(
440 raw_regex: &str,
441 state: &ParserState<'_>,
442 ) -> Result<String, ParseSyntaxError> {
443 let mut result = String::new();
444 let mut index = 0;
445 let mut region = Region::new();
446 while state.variable_regex.search(
447 raw_regex,
448 index,
449 raw_regex.len(),
450 Some(&mut region),
451 false,
452 )? {
453 let (begin, end) = region.pos(0).unwrap();
454
455 result.push_str(&raw_regex[index..begin]);
456
457 let var_pos = region.pos(1).unwrap();
458 let var_name = &raw_regex[var_pos.0..var_pos.1];
459 let var_raw = state
460 .variables
461 .get(var_name)
462 .map(String::as_ref)
463 .unwrap_or("");
464 let var_resolved = Self::resolve_variables(var_raw, state)?;
465 result.push_str(&var_resolved);
466
467 index = end;
468 }
469 if index < raw_regex.len() {
470 result.push_str(&raw_regex[index..]);
471 }
472 Ok(result)
473 }
474
475 fn try_compile_regex(regex_str: &str) -> Result<(), ParseSyntaxError> {
476 let regex_str =
478 substitute_backrefs_in_regex(regex_str, |i| Some(format!("<placeholder_{}>", i)));
479
480 if let Some(error) = Regex::try_compile(®ex_str) {
481 Err(ParseSyntaxError::RegexCompileError(regex_str, error))
482 } else {
483 Ok(())
484 }
485 }
486
487 fn parse_captures(
488 map: &Hash,
489 regex_str: &str,
490 state: &mut ParserState<'_>,
491 ) -> Result<CaptureMapping, ParseSyntaxError> {
492 let valid_indexes = get_consuming_capture_indexes(regex_str);
493 let mut captures = Vec::new();
494 for (key, value) in map.iter() {
495 if let (Some(key_int), Some(val_str)) = (key.as_i64(), value.as_str()) {
496 if valid_indexes.contains(&(key_int as usize)) {
497 captures.push((key_int as usize, str_to_scopes(val_str, state.scope_repo)?));
498 }
499 }
500 }
501 Ok(captures)
502 }
503
504 fn add_initial_contexts(
511 contexts: &mut HashMap<String, Context>,
512 state: &mut ParserState<'_>,
513 top_level_scope: Scope,
514 ) {
515 let yaml_docs = YamlLoader::load_from_str(START_CONTEXT).unwrap();
516 let yaml = &yaml_docs[0];
517
518 let start_yaml: &[Yaml] = yaml["__start"].as_vec().unwrap();
519 SyntaxDefinition::parse_context(
520 start_yaml,
521 state,
522 contexts,
523 false,
524 &mut ContextNamer::new("__start"),
525 )
526 .unwrap();
527 if let Some(start) = contexts.get_mut("__start") {
528 start.meta_content_scope = vec![top_level_scope];
529 }
530
531 let main_yaml: &[Yaml] = yaml["__main"].as_vec().unwrap();
532 SyntaxDefinition::parse_context(
533 main_yaml,
534 state,
535 contexts,
536 false,
537 &mut ContextNamer::new("__main"),
538 )
539 .unwrap();
540
541 let meta_include_prototype = contexts["main"].meta_include_prototype;
542 let meta_scope = contexts["main"].meta_scope.clone();
543 let meta_content_scope = contexts["main"].meta_content_scope.clone();
544
545 if let Some(outer_main) = contexts.get_mut("__main") {
546 outer_main.meta_include_prototype = meta_include_prototype;
547 outer_main.meta_scope = meta_scope;
548 outer_main.meta_content_scope = meta_content_scope;
549 }
550
551 if let Some(main) = contexts.get_mut("main") {
555 main.meta_content_scope.insert(0, top_level_scope);
556 }
557 }
558}
559
560struct ContextNamer {
561 name: String,
562 anonymous_index: Option<usize>,
563}
564
565impl ContextNamer {
566 fn new(name: &str) -> ContextNamer {
567 ContextNamer {
568 name: name.to_string(),
569 anonymous_index: None,
570 }
571 }
572
573 fn next(&mut self) -> String {
574 let name = if let Some(index) = self.anonymous_index {
575 format!("#anon_{}_{}", self.name, index)
576 } else {
577 self.name.clone()
578 };
579
580 self.anonymous_index = Some(self.anonymous_index.map(|i| i + 1).unwrap_or(0));
581 name
582 }
583}
584
585fn replace_posix_char_classes(regex: String) -> String {
590 regex
591 .replace("[:alpha:]", r"\p{L}")
592 .replace("[:alnum:]", r"\p{L}\p{N}")
593 .replace("[:lower:]", r"\p{Ll}")
594 .replace("[:upper:]", r"\p{Lu}")
595 .replace("[:digit:]", r"\p{Nd}")
596}
597
598fn regex_for_newlines(regex: String) -> String {
609 if !regex.contains('$') {
610 return regex;
611 }
612
613 let rewriter = RegexRewriterForNewlines {
614 parser: Parser::new(regex.as_bytes()),
615 };
616 rewriter.rewrite()
617}
618
619struct RegexRewriterForNewlines<'a> {
620 parser: Parser<'a>,
621}
622
623impl<'a> RegexRewriterForNewlines<'a> {
624 fn rewrite(mut self) -> String {
625 let mut result = Vec::new();
626
627 while let Some(c) = self.parser.peek() {
628 match c {
629 b'$' => {
630 self.parser.next();
631 result.extend_from_slice(br"(?m:$)");
632 }
633 b'\\' => {
634 self.parser.next();
635 result.push(c);
636 if let Some(c2) = self.parser.peek() {
637 self.parser.next();
638 result.push(c2);
639 }
640 }
641 b'[' => {
642 let (mut content, _) = self.parser.parse_character_class();
643 result.append(&mut content);
644 }
645 _ => {
646 self.parser.next();
647 result.push(c);
648 }
649 }
650 }
651 String::from_utf8(result).unwrap()
652 }
653}
654
655fn regex_for_no_newlines(regex: String) -> String {
664 if !regex.contains(r"\n") {
665 return regex;
666 }
667
668 let regex = regex.replace("(?:\\n)?", "(?:$|)");
671
672 let rewriter = RegexRewriterForNoNewlines {
673 parser: Parser::new(regex.as_bytes()),
674 };
675 rewriter.rewrite()
676}
677
678struct RegexRewriterForNoNewlines<'a> {
679 parser: Parser<'a>,
680}
681
682impl<'a> RegexRewriterForNoNewlines<'a> {
683 fn rewrite(mut self) -> String {
684 let mut result = Vec::new();
685 while let Some(c) = self.parser.peek() {
686 match c {
687 b'\\' => {
688 self.parser.next();
689 if let Some(c2) = self.parser.peek() {
690 self.parser.next();
691 let c3 = self.parser.peek();
694 if c2 == b'n' && c3 != Some(b'?') && c3 != Some(b'+') && c3 != Some(b'*') {
695 result.extend_from_slice(b"$");
696 } else {
697 result.push(c);
698 result.push(c2);
699 }
700 } else {
701 result.push(c);
702 }
703 }
704 b'[' => {
705 let (mut content, matches_newline) = self.parser.parse_character_class();
706 if matches_newline && self.parser.peek() != Some(b'?') {
707 result.extend_from_slice(b"(?:");
708 result.append(&mut content);
709 result.extend_from_slice(br"|$)");
710 } else {
711 result.append(&mut content);
712 }
713 }
714 _ => {
715 self.parser.next();
716 result.push(c);
717 }
718 }
719 }
720 String::from_utf8(result).unwrap()
721 }
722}
723
724fn get_consuming_capture_indexes(regex: &str) -> Vec<usize> {
725 let parser = ConsumingCaptureIndexParser {
726 parser: Parser::new(regex.as_bytes()),
727 };
728 parser.get_consuming_capture_indexes()
729}
730
731struct ConsumingCaptureIndexParser<'a> {
732 parser: Parser<'a>,
733}
734
735impl<'a> ConsumingCaptureIndexParser<'a> {
736 fn get_consuming_capture_indexes(mut self) -> Vec<usize> {
743 let mut result = Vec::new();
744 let mut stack = Vec::new();
745 let mut cap_num = 0;
746 let mut in_lookaround = false;
747 stack.push(in_lookaround);
748 result.push(cap_num);
749
750 while let Some(c) = self.parser.peek() {
751 match c {
752 b'\\' => {
753 self.parser.next();
754 self.parser.next();
755 }
756 b'[' => {
757 self.parser.parse_character_class();
758 }
759 b'(' => {
760 self.parser.next();
761 stack.push(in_lookaround);
763 if let Some(c2) = self.parser.peek() {
764 if c2 != b'?' {
765 cap_num += 1;
767 if !in_lookaround {
770 result.push(cap_num);
771 }
772 } else {
773 self.parser.next();
774 if let Some(c3) = self.parser.peek() {
775 self.parser.next();
776 if c3 == b'=' || c3 == b'!' {
777 in_lookaround = true;
779 } else if c3 == b'<' {
780 if let Some(c4) = self.parser.peek() {
781 if c4 == b'=' || c4 == b'!' {
782 self.parser.next();
783 in_lookaround = true;
785 }
786 }
787 } else if c3 == b'P' {
788 if let Some(c4) = self.parser.peek() {
789 if c4 == b'<' {
790 cap_num += 1;
792 if !in_lookaround {
795 result.push(cap_num);
796 }
797 }
798 }
799 }
800 }
801 }
802 }
803 }
804 b')' => {
805 if let Some(value) = stack.pop() {
806 in_lookaround = value;
807 }
808 self.parser.next();
809 }
810 _ => {
811 self.parser.next();
812 }
813 }
814 }
815 result
816 }
817}
818
819struct Parser<'a> {
820 bytes: &'a [u8],
821 index: usize,
822}
823
824impl<'a> Parser<'a> {
825 fn new(bytes: &[u8]) -> Parser {
826 Parser { bytes, index: 0 }
827 }
828
829 fn peek(&self) -> Option<u8> {
830 self.bytes.get(self.index).copied()
831 }
832
833 fn next(&mut self) {
834 self.index += 1;
835 }
836
837 fn parse_character_class(&mut self) -> (Vec<u8>, bool) {
838 let mut content = Vec::new();
839 let mut negated = false;
840 let mut nesting = 0;
841 let mut matches_newline = false;
842
843 self.next();
844 content.push(b'[');
845 if let Some(b'^') = self.peek() {
846 self.next();
847 content.push(b'^');
848 negated = true;
849 }
850
851 if let Some(b']') = self.peek() {
853 self.next();
854 content.push(b']');
855 }
856
857 while let Some(c) = self.peek() {
858 match c {
859 b'\\' => {
860 self.next();
861 content.push(c);
862 if let Some(c2) = self.peek() {
863 self.next();
864 if c2 == b'n' && !negated && nesting == 0 {
865 matches_newline = true;
866 }
867 content.push(c2);
868 }
869 }
870 b'[' => {
871 self.next();
872 content.push(b'[');
873 nesting += 1;
874 }
875 b']' => {
876 self.next();
877 content.push(b']');
878 if nesting == 0 {
879 break;
880 }
881 nesting -= 1;
882 }
883 _ => {
884 self.next();
885 content.push(c);
886 }
887 }
888 }
889
890 (content, matches_newline)
891 }
892}
893
894#[cfg(test)]
895mod tests {
896 use super::*;
897 use crate::parsing::Scope;
898
899 #[test]
900 fn can_parse() {
901 let defn: SyntaxDefinition = SyntaxDefinition::load_from_str(
902 "name: C\nscope: source.c\ncontexts: {main: []}",
903 false,
904 None,
905 )
906 .unwrap();
907 assert_eq!(defn.name, "C");
908 assert_eq!(defn.scope, Scope::new("source.c").unwrap());
909 let exts_empty: Vec<String> = Vec::new();
910 assert_eq!(defn.file_extensions, exts_empty);
911 assert!(!defn.hidden);
912 assert!(defn.variables.is_empty());
913 let defn2: SyntaxDefinition = SyntaxDefinition::load_from_str(
914 "
915 name: C
916 scope: source.c
917 file_extensions: [c, h]
918 hidden_file_extensions: [k, l]
919 hidden: true
920 variables:
921 ident: '[QY]+'
922 contexts:
923 prototype:
924 - match: lol
925 scope: source.php
926 main:
927 - match: \\b(if|else|for|while|{{ident}})\\b
928 scope: keyword.control.c keyword.looping.c
929 captures:
930 1: meta.preprocessor.c++
931 2: keyword.control.include.c++
932 push: [string, 'scope:source.c#main', 'CSS.sublime-syntax#rule-list-body']
933 with_prototype:
934 - match: wow
935 pop: true
936 - match: '\"'
937 push: string
938 string:
939 - meta_scope: string.quoted.double.c
940 - meta_include_prototype: false
941 - match: \\\\.
942 scope: constant.character.escape.c
943 - match: '\"'
944 pop: true
945 ",
946 false,
947 None,
948 )
949 .unwrap();
950 assert_eq!(defn2.name, "C");
951 let top_level_scope = Scope::new("source.c").unwrap();
952 assert_eq!(defn2.scope, top_level_scope);
953 let exts: Vec<String> = vec!["c", "h", "k", "l"]
954 .into_iter()
955 .map(String::from)
956 .collect();
957 assert_eq!(defn2.file_extensions, exts);
958 assert!(defn2.hidden);
959 assert_eq!(defn2.variables.get("ident").unwrap(), "[QY]+");
960
961 let n: Vec<Scope> = Vec::new();
962 println!("{:?}", defn2);
963 let main = &defn2.contexts["main"];
965 assert_eq!(main.meta_content_scope, vec![top_level_scope]);
966 assert_eq!(main.meta_scope, n);
967 assert!(main.meta_include_prototype);
968
969 assert_eq!(defn2.contexts["__main"].meta_content_scope, n);
970 assert_eq!(
971 defn2.contexts["__start"].meta_content_scope,
972 vec![top_level_scope]
973 );
974
975 assert_eq!(
976 defn2.contexts["string"].meta_scope,
977 vec![Scope::new("string.quoted.double.c").unwrap()]
978 );
979 let first_pattern: &Pattern = &main.patterns[0];
980 match *first_pattern {
981 Pattern::Match(ref match_pat) => {
982 let m: &CaptureMapping = match_pat.captures.as_ref().expect("test failed");
983 assert_eq!(
984 &m[0],
985 &(1, vec![Scope::new("meta.preprocessor.c++").unwrap()])
986 );
987 use crate::parsing::syntax_definition::ContextReference::*;
988
989 let expected = MatchOperation::Push(vec![
991 Named("string".to_owned()),
992 ByScope {
993 scope: Scope::new("source.c").unwrap(),
994 sub_context: Some("main".to_owned()),
995 with_escape: false,
996 },
997 File {
998 name: "CSS".to_owned(),
999 sub_context: Some("rule-list-body".to_owned()),
1000 with_escape: false,
1001 },
1002 ]);
1003 assert_eq!(
1004 format!("{:?}", match_pat.operation),
1005 format!("{:?}", expected)
1006 );
1007
1008 assert_eq!(
1009 match_pat.scope,
1010 vec![
1011 Scope::new("keyword.control.c").unwrap(),
1012 Scope::new("keyword.looping.c").unwrap()
1013 ]
1014 );
1015
1016 assert!(match_pat.with_prototype.is_some());
1017 }
1018 _ => unreachable!(),
1019 }
1020 }
1021
1022 #[test]
1023 fn can_parse_embed_as_with_prototypes() {
1024 let old_def = SyntaxDefinition::load_from_str(r#"
1025 name: C
1026 scope: source.c
1027 file_extensions: [c, h]
1028 variables:
1029 ident: '[QY]+'
1030 contexts:
1031 main:
1032 - match: '(>)\s*'
1033 captures:
1034 1: meta.tag.style.begin.html punctuation.definition.tag.end.html
1035 push:
1036 - [{ meta_include_prototype: false }, { meta_content_scope: 'source.css.embedded.html' }, { match: '(?i)(?=</style)', pop: true }]
1037 - scope:source.css
1038 with_prototype:
1039 - match: (?=(?i)(?=</style))
1040 pop: true
1041 "#,false, None).unwrap();
1042
1043 let mut def_with_embed = SyntaxDefinition::load_from_str(
1044 r#"
1045 name: C
1046 scope: source.c
1047 file_extensions: [c, h]
1048 variables:
1049 ident: '[QY]+'
1050 contexts:
1051 main:
1052 - match: '(>)\s*'
1053 captures:
1054 1: meta.tag.style.begin.html punctuation.definition.tag.end.html
1055 embed: scope:source.css
1056 embed_scope: source.css.embedded.html
1057 escape: (?i)(?=</style)
1058 "#,
1059 false,
1060 None,
1061 )
1062 .unwrap();
1063
1064 let def_with_embed_context = def_with_embed.contexts.get_mut("main").unwrap();
1069 if let Pattern::Match(ref mut match_pattern) = def_with_embed_context.patterns[0] {
1070 if let MatchOperation::Push(ref mut context_references) = match_pattern.operation {
1071 if let ContextReference::ByScope {
1072 ref mut with_escape,
1073 ..
1074 } = context_references[1]
1075 {
1076 *with_escape = false;
1077 }
1078 }
1079 }
1080
1081 assert_eq!(old_def.contexts["main"], def_with_embed.contexts["main"]);
1082 }
1083
1084 #[test]
1085 fn errors_on_embed_without_escape() {
1086 let def = SyntaxDefinition::load_from_str(
1087 r#"
1088 name: C
1089 scope: source.c
1090 file_extensions: [c, h]
1091 variables:
1092 ident: '[QY]+'
1093 contexts:
1094 main:
1095 - match: '(>)\s*'
1096 captures:
1097 1: meta.tag.style.begin.html punctuation.definition.tag.end.html
1098 embed: scope:source.css
1099 embed_scope: source.css.embedded.html
1100 "#,
1101 false,
1102 None,
1103 );
1104 assert!(def.is_err());
1105 match def.unwrap_err() {
1106 ParseSyntaxError::MissingMandatoryKey(key) => assert_eq!(key, "escape"),
1107 _ => unreachable!("Got unexpected ParseSyntaxError"),
1108 }
1109 }
1110
1111 #[test]
1112 fn errors_on_regex_compile_error() {
1113 let def = SyntaxDefinition::load_from_str(
1114 r#"
1115 name: C
1116 scope: source.c
1117 file_extensions: [test]
1118 contexts:
1119 main:
1120 - match: '[a'
1121 scope: keyword.name
1122 "#,
1123 false,
1124 None,
1125 );
1126 assert!(def.is_err());
1127 match def.unwrap_err() {
1128 ParseSyntaxError::RegexCompileError(ref regex, _) => assert_eq!("[a", regex),
1129 _ => unreachable!("Got unexpected ParseSyntaxError"),
1130 }
1131 }
1132
1133 #[test]
1134 fn can_parse_ugly_yaml() {
1135 let defn: SyntaxDefinition = SyntaxDefinition::load_from_str(
1136 "
1137 name: LaTeX
1138 scope: text.tex.latex
1139 contexts:
1140 main:
1141 - match: '((\\\\)(?:framebox|makebox))\\b'
1142 captures:
1143 1: support.function.box.latex
1144 2: punctuation.definition.backslash.latex
1145 push:
1146 - [{meta_scope: meta.function.box.latex}, {match: '', pop: true}]
1147 - argument
1148 - optional-arguments
1149 argument:
1150 - match: '\\{'
1151 scope: punctuation.definition.group.brace.begin.latex
1152 - match: '(?=\\S)'
1153 pop: true
1154 optional-arguments:
1155 - match: '(?=\\S)'
1156 pop: true
1157 ",
1158 false,
1159 None,
1160 )
1161 .unwrap();
1162 assert_eq!(defn.name, "LaTeX");
1163 let top_level_scope = Scope::new("text.tex.latex").unwrap();
1164 assert_eq!(defn.scope, top_level_scope);
1165
1166 let first_pattern: &Pattern = &defn.contexts["main"].patterns[0];
1167 match *first_pattern {
1168 Pattern::Match(ref match_pat) => {
1169 let m: &CaptureMapping = match_pat.captures.as_ref().expect("test failed");
1170 assert_eq!(
1171 &m[0],
1172 &(1, vec![Scope::new("support.function.box.latex").unwrap()])
1173 );
1174
1175 assert!(match_pat.with_prototype.is_none());
1181 }
1182 _ => unreachable!(),
1183 }
1184 }
1185
1186 #[test]
1187 fn names_anonymous_contexts() {
1188 let def = SyntaxDefinition::load_from_str(
1189 r#"
1190 scope: source.c
1191 contexts:
1192 main:
1193 - match: a
1194 push: a
1195 a:
1196 - meta_scope: a
1197 - match: x
1198 push:
1199 - meta_scope: anonymous_x
1200 - match: anything
1201 push:
1202 - meta_scope: anonymous_x_2
1203 - match: y
1204 push:
1205 - meta_scope: anonymous_y
1206 - match: z
1207 escape: 'test'
1208 "#,
1209 false,
1210 None,
1211 )
1212 .unwrap();
1213
1214 assert_eq!(def.contexts["a"].meta_scope, vec![Scope::new("a").unwrap()]);
1215 assert_eq!(
1216 def.contexts["#anon_a_0"].meta_scope,
1217 vec![Scope::new("anonymous_x").unwrap()]
1218 );
1219 assert_eq!(
1220 def.contexts["#anon_a_1"].meta_scope,
1221 vec![Scope::new("anonymous_x_2").unwrap()]
1222 );
1223 assert_eq!(
1224 def.contexts["#anon_a_2"].meta_scope,
1225 vec![Scope::new("anonymous_y").unwrap()]
1226 );
1227 assert_eq!(def.contexts["#anon_a_3"].patterns.len(), 1); }
1229
1230 #[test]
1231 fn can_use_fallback_name() {
1232 let def = SyntaxDefinition::load_from_str(
1233 r#"
1234 scope: source.c
1235 contexts:
1236 main:
1237 - match: ''
1238 "#,
1239 false,
1240 Some("C"),
1241 );
1242 assert_eq!(def.unwrap().name, "C");
1243 }
1244
1245 #[test]
1246 fn can_rewrite_regex_for_newlines() {
1247 fn rewrite(s: &str) -> String {
1248 regex_for_newlines(s.to_string())
1249 }
1250
1251 assert_eq!(&rewrite(r"a"), r"a");
1252 assert_eq!(&rewrite(r"\b"), r"\b");
1253 assert_eq!(&rewrite(r"(a)"), r"(a)");
1254 assert_eq!(&rewrite(r"[a]"), r"[a]");
1255 assert_eq!(&rewrite(r"[^a]"), r"[^a]");
1256 assert_eq!(&rewrite(r"[]a]"), r"[]a]");
1257 assert_eq!(&rewrite(r"[[a]]"), r"[[a]]");
1258
1259 assert_eq!(&rewrite(r"^"), r"^");
1260 assert_eq!(&rewrite(r"$"), r"(?m:$)");
1261 assert_eq!(&rewrite(r"^ab$"), r"^ab(?m:$)");
1262 assert_eq!(&rewrite(r"\^ab\$"), r"\^ab\$");
1263 assert_eq!(&rewrite(r"(//).*$"), r"(//).*(?m:$)");
1264
1265 assert_eq!(&rewrite(r"[a$]"), r"[a$]");
1267 }
1268
1269 #[test]
1270 fn can_rewrite_regex_for_no_newlines() {
1271 fn rewrite(s: &str) -> String {
1272 regex_for_no_newlines(s.to_string())
1273 }
1274
1275 assert_eq!(&rewrite(r"a"), r"a");
1276 assert_eq!(&rewrite(r"\b"), r"\b");
1277 assert_eq!(&rewrite(r"(a)"), r"(a)");
1278 assert_eq!(&rewrite(r"[a]"), r"[a]");
1279 assert_eq!(&rewrite(r"[^a]"), r"[^a]");
1280 assert_eq!(&rewrite(r"[]a]"), r"[]a]");
1281 assert_eq!(&rewrite(r"[[a]]"), r"[[a]]");
1282
1283 assert_eq!(&rewrite(r"\n"), r"$");
1284 assert_eq!(&rewrite(r"\[\n"), r"\[$");
1285 assert_eq!(&rewrite(r"a\n?"), r"a\n?");
1286 assert_eq!(&rewrite(r"a\n+"), r"a\n+");
1287 assert_eq!(&rewrite(r"a\n*"), r"a\n*");
1288 assert_eq!(&rewrite(r"[abc\n]"), r"(?:[abc\n]|$)");
1289 assert_eq!(&rewrite(r"[^\n]"), r"[^\n]");
1290 assert_eq!(&rewrite(r"[^]\n]"), r"[^]\n]");
1291 assert_eq!(&rewrite(r"[\n]?"), r"[\n]?");
1292 assert_eq!(&rewrite(r"[\n]"), r"(?:[\n]|$)");
1294 assert_eq!(&rewrite(r"[]\n]"), r"(?:[]\n]|$)");
1295 assert_eq!(&rewrite(r"[[a]&&[\n]]"), r"[[a]&&[\n]]");
1297
1298 assert_eq!(&rewrite(r"ab(?:\n)?"), r"ab(?:$|)");
1299 assert_eq!(&rewrite(r"(?<!\n)ab"), r"(?<!$)ab");
1300 assert_eq!(&rewrite(r"(?<=\n)ab"), r"(?<=$)ab");
1301 }
1302
1303 #[test]
1304 fn can_get_valid_captures_from_regex() {
1305 let regex = "hello(test)(?=(world))(foo(?P<named>bar))";
1306 println!("{:?}", regex);
1307 let valid_indexes = get_consuming_capture_indexes(regex);
1308 println!("{:?}", valid_indexes);
1309 assert_eq!(valid_indexes, [0, 1, 3, 4]);
1310 }
1311
1312 #[test]
1313 fn can_get_valid_captures_from_regex2() {
1314 let regex = "hello(test)[(?=tricked](foo(bar))";
1315 println!("{:?}", regex);
1316 let valid_indexes = get_consuming_capture_indexes(regex);
1317 println!("{:?}", valid_indexes);
1318 assert_eq!(valid_indexes, [0, 1, 2, 3]);
1319 }
1320
1321 #[test]
1322 fn can_get_valid_captures_from_nested_regex() {
1323 let regex = "hello(test)(?=(world(?!(te(?<=(st))))))(foo(bar))";
1324 println!("{:?}", regex);
1325 let valid_indexes = get_consuming_capture_indexes(regex);
1326 println!("{:?}", valid_indexes);
1327 assert_eq!(valid_indexes, [0, 1, 5, 6]);
1328 }
1329}