syntect_no_panic/parsing/
scope.rs1use std::cmp::{min, Ordering};
3use std::collections::HashMap;
4use std::fmt;
5use std::mem;
6use std::str::FromStr;
7use std::sync::Mutex;
8use std::u16;
9use std::u64;
10
11use once_cell::sync::Lazy;
12use serde::de::{Deserialize, Deserializer, Error, Visitor};
13use serde::ser::{Serialize, Serializer};
14use serde_derive::{Deserialize, Serialize};
15
16#[derive(Debug, thiserror::Error)]
18#[non_exhaustive]
19pub enum ScopeError {
20 #[error("Tried to restore cleared scopes, but none were cleared")]
21 NoClearedScopesToRestore,
22}
23
24pub const ATOM_LEN_BITS: u16 = 3;
29
30pub static SCOPE_REPO: Lazy<Mutex<ScopeRepository>> =
36 Lazy::new(|| Mutex::new(ScopeRepository::new()));
37
38#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Copy, Default, Hash)]
54pub struct Scope {
55 a: u64,
56 b: u64,
57}
58
59#[derive(Debug, thiserror::Error)]
61#[non_exhaustive]
62pub enum ParseScopeError {
63 #[error("Too long scope. Scopes can be at most 8 atoms long.")]
66 TooLong,
67 #[error("Too many atoms. Max 2^16-2 atoms allowed.")]
70 TooManyAtoms,
71}
72
73#[derive(Debug)]
85pub struct ScopeRepository {
86 atoms: Vec<String>,
87 atom_index_map: HashMap<String, usize>,
88}
89
90#[derive(Debug, Clone, PartialEq, Eq, Default, Serialize, Deserialize)]
102pub struct ScopeStack {
103 clear_stack: Vec<Vec<Scope>>,
104 pub scopes: Vec<Scope>,
105}
106
107#[derive(Debug, Clone, Copy, Eq, PartialEq, Serialize, Deserialize)]
108pub enum ClearAmount {
109 TopN(usize),
110 All,
111}
112
113#[derive(Debug, Clone, PartialEq, Eq)]
122pub enum ScopeStackOp {
123 Push(Scope),
124 Pop(usize),
125 Clear(ClearAmount),
127 Restore,
129 Noop,
130}
131
132#[derive(Debug, Clone, PartialEq, Eq)]
136pub enum BasicScopeStackOp {
137 Push(Scope),
138 Pop,
139}
140
141fn pack_as_u16s(atoms: &[usize]) -> Result<Scope, ParseScopeError> {
142 let mut res = Scope { a: 0, b: 0 };
143
144 for (i, &n) in atoms.iter().enumerate() {
145 if n >= (u16::MAX as usize) - 2 {
146 return Err(ParseScopeError::TooManyAtoms);
147 }
148 let small = (n + 1) as u64; if i < 4 {
151 let shift = (3 - i) * 16;
152 res.a |= small << shift;
153 } else {
154 let shift = (7 - i) * 16;
155 res.b |= small << shift;
156 }
157 }
158 Ok(res)
159}
160
161impl ScopeRepository {
162 fn new() -> ScopeRepository {
163 ScopeRepository {
164 atoms: Vec::new(),
165 atom_index_map: HashMap::new(),
166 }
167 }
168
169 pub fn build(&mut self, s: &str) -> Result<Scope, ParseScopeError> {
170 if s.is_empty() {
171 return Ok(Scope { a: 0, b: 0 });
172 }
173 let parts: Vec<usize> = s
174 .trim_end_matches('.')
175 .split('.')
176 .map(|a| self.atom_to_index(a))
177 .collect();
178 if parts.len() > 8 {
179 return Err(ParseScopeError::TooManyAtoms);
180 }
181 pack_as_u16s(&parts[..])
182 }
183
184 pub fn to_string(&self, scope: Scope) -> String {
185 let mut s = String::new();
186 for i in 0..8 {
187 let atom_number = scope.atom_at(i);
188 if atom_number == 0 {
191 break;
192 }
193 if i != 0 {
194 s.push('.');
195 }
196 s.push_str(self.atom_str(atom_number));
197 }
198 s
199 }
200
201 fn atom_to_index(&mut self, atom: &str) -> usize {
202 if let Some(index) = self.atom_index_map.get(atom) {
203 return *index;
204 }
205
206 self.atoms.push(atom.to_owned());
207 let index = self.atoms.len() - 1;
208 self.atom_index_map.insert(atom.to_owned(), index);
209
210 index
211 }
212
213 pub fn atom_str(&self, atom_number: u16) -> &str {
217 &self.atoms[(atom_number - 1) as usize]
218 }
219}
220
221impl Scope {
222 pub fn new(s: &str) -> Result<Scope, ParseScopeError> {
226 let mut repo = SCOPE_REPO.lock().unwrap();
227 repo.build(s.trim())
228 }
229
230 pub fn atom_at(self, index: usize) -> u16 {
235 #[allow(clippy::panic)]
236 let shifted = if index < 4 {
238 self.a >> ((3 - index) * 16)
239 } else if index < 8 {
240 self.b >> ((7 - index) * 16)
241 } else {
242 panic!("atom index out of bounds {:?}", index);
243 };
244 (shifted & 0xFFFF) as u16
245 }
246
247 #[inline]
248 fn missing_atoms(self) -> u32 {
249 let trail = if self.b == 0 {
250 self.a.trailing_zeros() + 64
251 } else {
252 self.b.trailing_zeros()
253 };
254 trail / 16
255 }
256
257 #[inline(always)]
259 pub fn len(self) -> u32 {
260 8 - self.missing_atoms()
261 }
262
263 pub fn is_empty(self) -> bool {
264 self.len() == 0
265 }
266
267 pub fn build_string(self) -> String {
271 let repo = SCOPE_REPO.lock().unwrap();
272 repo.to_string(self)
273 }
274
275 pub fn is_prefix_of(self, s: Scope) -> bool {
299 let pref_missing = self.missing_atoms();
300
301 let mask: (u64, u64) = if pref_missing == 8 {
303 (0, 0)
304 } else if pref_missing == 4 {
305 (u64::MAX, 0)
306 } else if pref_missing > 4 {
307 (u64::MAX << ((pref_missing - 4) * 16), 0)
308 } else {
309 (u64::MAX, u64::MAX << (pref_missing * 16))
310 };
311
312 let ax = (self.a ^ s.a) & mask.0;
314 let bx = (self.b ^ s.b) & mask.1;
315 ax == 0 && bx == 0
319 }
320}
321
322impl FromStr for Scope {
323 type Err = ParseScopeError;
324
325 fn from_str(s: &str) -> Result<Scope, ParseScopeError> {
326 Scope::new(s)
327 }
328}
329
330impl fmt::Display for Scope {
331 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
332 let s = self.build_string();
333 write!(f, "{}", s)
334 }
335}
336
337impl fmt::Debug for Scope {
338 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
339 let s = self.build_string();
340 write!(f, "<{}>", s)
341 }
342}
343
344impl Serialize for Scope {
345 fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
346 where
347 S: Serializer,
348 {
349 let s = self.build_string();
350 serializer.serialize_str(&s)
351 }
352}
353
354impl<'de> Deserialize<'de> for Scope {
355 fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
356 where
357 D: Deserializer<'de>,
358 {
359 struct ScopeVisitor;
360
361 impl<'de> Visitor<'de> for ScopeVisitor {
362 type Value = Scope;
363
364 fn expecting(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
365 formatter.write_str("a string")
366 }
367
368 fn visit_str<E>(self, v: &str) -> Result<Scope, E>
369 where
370 E: Error,
371 {
372 Scope::new(v).map_err(|e| Error::custom(format!("Invalid scope: {:?}", e)))
373 }
374 }
375
376 deserializer.deserialize_str(ScopeVisitor)
377 }
378}
379
380#[derive(Debug, Copy, Clone, PartialOrd, PartialEq)]
383pub struct MatchPower(pub f64);
384
385impl Eq for MatchPower {}
386
387#[allow(clippy::derive_ord_xor_partial_ord)] impl Ord for MatchPower {
389 fn cmp(&self, other: &Self) -> Ordering {
390 self.partial_cmp(other).unwrap()
391 }
392}
393
394impl ScopeStack {
395 pub fn new() -> ScopeStack {
396 ScopeStack {
397 clear_stack: Vec::new(),
398 scopes: Vec::new(),
399 }
400 }
401
402 pub fn from_vec(v: Vec<Scope>) -> ScopeStack {
405 ScopeStack {
406 clear_stack: Vec::new(),
407 scopes: v,
408 }
409 }
410
411 #[inline]
412 pub fn push(&mut self, s: Scope) {
413 self.scopes.push(s);
414 }
415
416 #[inline]
417 pub fn pop(&mut self) {
418 self.scopes.pop();
419 }
420
421 pub fn apply(&mut self, op: &ScopeStackOp) -> Result<(), ScopeError> {
425 self.apply_with_hook(op, |_, _| {})
426 }
427
428 #[inline]
436 pub fn apply_with_hook<F>(&mut self, op: &ScopeStackOp, mut hook: F) -> Result<(), ScopeError>
437 where
438 F: FnMut(BasicScopeStackOp, &[Scope]),
439 {
440 match *op {
441 ScopeStackOp::Push(scope) => {
442 self.scopes.push(scope);
443 hook(BasicScopeStackOp::Push(scope), self.as_slice());
444 }
445 ScopeStackOp::Pop(count) => {
446 for _ in 0..count {
447 self.scopes.pop();
448 hook(BasicScopeStackOp::Pop, self.as_slice());
449 }
450 }
451 ScopeStackOp::Clear(amount) => {
452 let cleared = match amount {
453 ClearAmount::TopN(n) => {
454 let to_leave = self.scopes.len() - min(n, self.scopes.len());
456 self.scopes.split_off(to_leave)
457 }
458 ClearAmount::All => {
459 let mut cleared = Vec::new();
460 mem::swap(&mut cleared, &mut self.scopes);
461 cleared
462 }
463 };
464 let clear_amount = cleared.len();
465 self.clear_stack.push(cleared);
466 for _ in 0..clear_amount {
467 hook(BasicScopeStackOp::Pop, self.as_slice());
468 }
469 }
470 ScopeStackOp::Restore => match self.clear_stack.pop() {
471 Some(ref mut to_push) => {
472 for s in to_push {
473 self.scopes.push(*s);
474 hook(BasicScopeStackOp::Push(*s), self.as_slice());
475 }
476 }
477 None => return Err(ScopeError::NoClearedScopesToRestore),
478 },
479 ScopeStackOp::Noop => (),
480 }
481
482 Ok(())
483 }
484
485 pub fn debug_print(&self, repo: &ScopeRepository) {
488 for s in &self.scopes {
489 print!("{} ", repo.to_string(*s));
490 }
491 println!();
492 }
493
494 pub fn bottom_n(&self, n: usize) -> &[Scope] {
498 &self.scopes[0..n]
499 }
500
501 #[inline]
503 pub fn as_slice(&self) -> &[Scope] {
504 &self.scopes[..]
505 }
506
507 #[inline]
509 pub fn len(&self) -> usize {
510 self.scopes.len()
511 }
512
513 #[inline]
514 pub fn is_empty(&self) -> bool {
515 self.len() == 0
516 }
517
518 pub fn does_match(&self, stack: &[Scope]) -> Option<MatchPower> {
540 let mut sel_index: usize = 0;
541 let mut score: f64 = 0.0;
542 for (i, scope) in stack.iter().enumerate() {
543 let sel_scope = self.scopes[sel_index];
544 if sel_scope.is_prefix_of(*scope) {
545 let len = sel_scope.len();
546 score += f64::from(len) * f64::from(ATOM_LEN_BITS * (i as u16)).exp2();
548 sel_index += 1;
549 if sel_index >= self.scopes.len() {
550 return Some(MatchPower(score));
551 }
552 }
553 }
554 None
555 }
556}
557
558impl FromStr for ScopeStack {
559 type Err = ParseScopeError;
560
561 fn from_str(s: &str) -> Result<ScopeStack, ParseScopeError> {
563 let mut scopes = Vec::new();
564 for name in s.split_whitespace() {
565 scopes.push(Scope::from_str(name)?)
566 }
567 Ok(ScopeStack::from_vec(scopes))
568 }
569}
570
571impl fmt::Display for ScopeStack {
572 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
573 for s in &self.scopes {
574 write!(f, "{} ", s)?;
575 }
576 Ok(())
577 }
578}
579
580#[cfg(test)]
581mod tests {
582 use super::*;
583
584 #[test]
585 fn misc() {
586 }
592
593 #[test]
594 fn repo_works() {
595 let mut repo = ScopeRepository::new();
596 assert_eq!(
597 repo.build("source.php").unwrap(),
598 repo.build("source.php").unwrap()
599 );
600 assert_eq!(
601 repo.build("source.php.wow.hi.bob.troll.clock.5").unwrap(),
602 repo.build("source.php.wow.hi.bob.troll.clock.5").unwrap()
603 );
604 assert_eq!(repo.build("").unwrap(), repo.build("").unwrap());
605 let s1 = repo.build("").unwrap();
606 assert_eq!(repo.to_string(s1), "");
607 let s2 = repo.build("source.php.wow").unwrap();
608 assert_eq!(repo.to_string(s2), "source.php.wow");
609 assert!(repo.build("source.php").unwrap() != repo.build("source.perl").unwrap());
610 assert!(repo.build("source.php").unwrap() != repo.build("source.php.wagon").unwrap());
611 assert_eq!(
612 repo.build("comment.line.").unwrap(),
613 repo.build("comment.line").unwrap()
614 );
615 }
616
617 #[test]
618 fn global_repo_works() {
619 use std::str::FromStr;
620 assert_eq!(
621 Scope::new("source.php").unwrap(),
622 Scope::new("source.php").unwrap()
623 );
624 assert!(Scope::from_str("1.2.3.4.5.6.7.8").is_ok());
625 assert!(Scope::from_str("1.2.3.4.5.6.7.8.9").is_err());
626 }
627
628 #[test]
629 fn prefixes_work() {
630 assert!(Scope::new("1.2.3.4.5.6.7.8")
631 .unwrap()
632 .is_prefix_of(Scope::new("1.2.3.4.5.6.7.8").unwrap()));
633 assert!(Scope::new("1.2.3.4.5.6")
634 .unwrap()
635 .is_prefix_of(Scope::new("1.2.3.4.5.6.7.8").unwrap()));
636 assert!(Scope::new("1.2.3.4")
637 .unwrap()
638 .is_prefix_of(Scope::new("1.2.3.4.5.6.7.8").unwrap()));
639 assert!(!Scope::new("1.2.3.4.5.6.a")
640 .unwrap()
641 .is_prefix_of(Scope::new("1.2.3.4.5.6.7.8").unwrap()));
642 assert!(!Scope::new("1.2.a.4.5.6.7")
643 .unwrap()
644 .is_prefix_of(Scope::new("1.2.3.4.5.6.7.8").unwrap()));
645 assert!(!Scope::new("1.2.a.4.5.6.7")
646 .unwrap()
647 .is_prefix_of(Scope::new("1.2.3.4.5").unwrap()));
648 assert!(!Scope::new("1.2.a")
649 .unwrap()
650 .is_prefix_of(Scope::new("1.2.3.4.5.6.7.8").unwrap()));
651 }
652
653 #[test]
654 fn matching_works() {
655 use std::str::FromStr;
656 assert_eq!(
657 ScopeStack::from_str("string")
658 .unwrap()
659 .does_match(ScopeStack::from_str("string.quoted").unwrap().as_slice()),
660 Some(MatchPower(0o1u64 as f64))
661 );
662 assert_eq!(
663 ScopeStack::from_str("source")
664 .unwrap()
665 .does_match(ScopeStack::from_str("string.quoted").unwrap().as_slice()),
666 None
667 );
668 assert_eq!(
669 ScopeStack::from_str("a.b e.f")
670 .unwrap()
671 .does_match(ScopeStack::from_str("a.b c.d e.f.g").unwrap().as_slice()),
672 Some(MatchPower(0o202u64 as f64))
673 );
674 assert_eq!(
675 ScopeStack::from_str("c e.f")
676 .unwrap()
677 .does_match(ScopeStack::from_str("a.b c.d e.f.g").unwrap().as_slice()),
678 Some(MatchPower(0o210u64 as f64))
679 );
680 assert_eq!(
681 ScopeStack::from_str("c.d e.f")
682 .unwrap()
683 .does_match(ScopeStack::from_str("a.b c.d e.f.g").unwrap().as_slice()),
684 Some(MatchPower(0o220u64 as f64))
685 );
686 assert_eq!(
687 ScopeStack::from_str("a.b c e.f")
688 .unwrap()
689 .does_match(ScopeStack::from_str("a.b c.d e.f.g").unwrap().as_slice()),
690 Some(MatchPower(0o212u64 as f64))
691 );
692 assert_eq!(
693 ScopeStack::from_str("a c.d")
694 .unwrap()
695 .does_match(ScopeStack::from_str("a.b c.d e.f.g").unwrap().as_slice()),
696 Some(MatchPower(0o021u64 as f64))
697 );
698 assert_eq!(
699 ScopeStack::from_str("a c.d.e")
700 .unwrap()
701 .does_match(ScopeStack::from_str("a.b c.d e.f.g").unwrap().as_slice()),
702 None
703 );
704 }
705}