org_rust_parser/utils.rs
1use crate::types::Cursor;
2use phf::phf_set;
3
4// Either a whitespace character, -, ., ,, ;, :, !, ?, ', ), }, [, ", or the end of a line.
5static MARKUP_POST: phf::Set<u8> = phf_set! {
6 b'-',
7 b'.',
8 b',',
9 b';',
10 b':',
11 b'!',
12 b'?',
13 b')',
14 b'}',
15 b'[',
16 b'"',
17 b'\'',
18 // whitespace chars
19 b'\n',
20 b' ',
21 b'\t',
22 b'|',
23 b']',
24 b'/',
25 b'*',
26 b'_',
27 b'+',
28};
29
30// Either a whitespace character, -, (, {, ', ", or the beginning of a line.
31static MARKUP_PRE: phf::Set<u8> = phf_set! {
32 b'-',
33 b'(',
34 b'{',
35 b'\'',
36 b'"',
37 // whitespace character
38 b' ',
39 b'\t',
40 // checks for beginning of line
41 b'\n',
42 // // Non Standard
43 b'|',
44 b'[',
45 b'/',
46 b'*',
47 b'_',
48 b'+',
49 b':',
50};
51
52// Why add non-standard extenders?
53// org mode syntax allows */abc/* to be defined as both bold and italic
54// even though * and / are not in PRE/POST, this is because it clamps then
55// parses the contents.
56//
57// my extensions to PRE/POST are more permissive than the spec since it allows
58// [/abc/] to be interpreted as markup (the object doesn't have to belong to markup)
59// another example is:
60//
61// /abc _*one*/
62//
63// this shouldn't contain a bold object, but with these changes it does. I find this behaviour
64// to be fairly reasonable imo, and don't mind the more permissive markup syntax.
65// if there are other unexpected interactions however then I'll have to find
66// the ending delimeter and then parse the contents within (entails reading over the
67// contained text twice, not ideal).
68
69/// ## SAFETY:
70/// We are given a valid utf8 string to parse with, no need for re-validation
71/// with `str::from_utf8`()
72///
73/// Not measured to see if this is a significant performance hit, but
74/// it's a safe assumption to make that we're indexing into valid utf8,
75/// otherwise we have an internal bug and we'd be unwrapping immediately
76/// afterwards with the safe alternative either way.
77#[inline]
78pub(crate) fn bytes_to_str(byte_arr: &[u8]) -> &str {
79 unsafe { std::str::from_utf8_unchecked(byte_arr) }
80}
81
82/// The range of an arbitary item in the source text.
83#[derive(Debug, Clone)]
84pub struct Match<T> {
85 pub start: usize,
86 pub end: usize,
87 pub obj: T,
88}
89
90impl<'a, T> Match<T> {
91 #[inline]
92 pub fn to_str(&self, source: &'a str) -> &'a str {
93 &source[self.start..self.end]
94 }
95
96 pub fn len(&self) -> usize {
97 self.end - self.start
98 }
99}
100
101/// Compares variants of an enum for equality
102pub(crate) fn variant_eq<T>(a: &T, b: &T) -> bool {
103 std::mem::discriminant(a) == std::mem::discriminant(b)
104}
105
106pub(crate) fn verify_markup(cursor: Cursor, post: bool) -> bool {
107 let before_maybe = cursor.peek_rev(1);
108 let after_maybe = cursor.peek(1);
109
110 if post {
111 // if we're in post, then a character before the markup Must Exist
112 !before_maybe.unwrap().is_ascii_whitespace()
113 && if let Ok(val) = after_maybe {
114 MARKUP_POST.contains(&val)
115 } else {
116 true
117 }
118 } else if let Ok(after) = after_maybe {
119 !after.is_ascii_whitespace()
120 && if let Ok(val) = before_maybe {
121 MARKUP_PRE.contains(&val)
122 } else {
123 // bof is always valid
124 true
125 }
126 } else {
127 // if there's no after, cannot be valid markup
128 false
129 }
130}
131
132pub(crate) fn id_escape(potential_id: &str) -> String {
133 // minor over-allocation in some cases, but I expect most
134 // id recepients to be light on the shenanigans
135 let mut ret = String::with_capacity(potential_id.len());
136 for chr in potential_id.chars() {
137 if chr == ' ' {
138 ret.push('-');
139 } else if chr == '_' || chr == '-' {
140 ret.push(chr);
141 } else if chr.is_alphanumeric() {
142 // unicode lowercases can span multiple characters
143 for val in chr.to_lowercase() {
144 ret.push(val);
145 }
146 }
147 }
148 ret
149}
150
151/// Shorthand for extracting a [`crate::Expr`] from a [`crate::Parser`].
152///
153/// # Example
154///
155/// ```rust
156/// use org_rust_parser as org_parser;
157///
158/// use org_parser::{Expr, expr_in_pool, parse_org};
159/// use org_parser::element::Heading;
160///
161/// let ret_parse = parse_org("* Hello world!\n");
162/// let heading_expr: &Heading = expr_in_pool!(ret_parse, Heading).unwrap();
163/// ```
164#[macro_export]
165macro_rules! expr_in_pool {
166 ($parsed: ident, $name: ident) => {
167 $parsed.pool.iter().find_map(|x| {
168 if let Expr::$name(i) = &x.obj {
169 Some(i)
170 } else {
171 None
172 }
173 })
174 };
175}
176
177/// Shorthand for extracting a [`crate::Node`] from a [`crate::Parser`].
178///
179/// # Example
180///
181/// ```rust
182/// use org_rust_parser as org_parser;
183///
184/// use org_parser::{Expr, node_in_pool, parse_org, Node};
185///
186/// let ret_parse = parse_org("* Hello world!\n");
187/// let heading_expr: &Node = node_in_pool!(ret_parse, Heading).unwrap();
188/// ```
189#[macro_export]
190macro_rules! node_in_pool {
191 ($parsed: ident, $name: ident) => {
192 $parsed.pool.iter().find_map(|x| {
193 if let Expr::$name(i) = &x.obj {
194 Some(x)
195 } else {
196 None
197 }
198 })
199 };
200}