marco_core/parser/inlines/
text_parser.rs1use super::shared::{opt_span, GrammarSpan};
7use crate::parser::ast::{Node, NodeKind};
8use nom::bytes::complete::take;
9use nom::IResult;
10use nom::Input;
11use nom::Parser;
12
13pub fn parse_text(input: GrammarSpan) -> IResult<GrammarSpan, Node> {
35 let text_fragment = input.fragment();
36
37 let next_autolink_literal =
41 super::gfm_autolink_literal_parser::find_next_autolink_literal_start(text_fragment)
42 .unwrap_or(text_fragment.len());
43
44 let next_emoji_shortcode =
47 super::marco_emoji_shortcode_parser::find_next_emoji_shortcode_start(text_fragment)
48 .unwrap_or(text_fragment.len());
49
50 let next_platform_mention =
52 super::marco_platform_mentions_parser::find_next_platform_mention_start(text_fragment)
53 .unwrap_or(text_fragment.len());
54
55 let next_special = text_fragment
61 .char_indices()
62 .find_map(|(idx, ch)| match ch {
63 '*' | '_' | '`' | '[' | '<' | '!' | '&' | '\n' | '\\' | '$' => Some(idx),
64 '^' | '~' | '˅' => Some(idx),
65 '=' => {
66 if text_fragment[idx..].starts_with("==") {
67 Some(idx)
68 } else {
69 None
70 }
71 }
72 '-' => {
73 if text_fragment[idx..].starts_with("--") {
74 Some(idx)
75 } else {
76 None
77 }
78 }
79 _ => None,
80 })
81 .unwrap_or(text_fragment.len());
82
83 if next_autolink_literal == 0 {
85 return Err(nom::Err::Error(nom::error::Error::new(
86 input,
87 nom::error::ErrorKind::Verify,
88 )));
89 }
90
91 if next_emoji_shortcode == 0 {
93 return Err(nom::Err::Error(nom::error::Error::new(
94 input,
95 nom::error::ErrorKind::Verify,
96 )));
97 }
98
99 if next_platform_mention == 0 {
101 return Err(nom::Err::Error(nom::error::Error::new(
102 input,
103 nom::error::ErrorKind::Verify,
104 )));
105 }
106
107 let next_special = next_special
108 .min(next_autolink_literal)
109 .min(next_emoji_shortcode)
110 .min(next_platform_mention);
111
112 if next_special == 0 {
113 return Err(nom::Err::Error(nom::error::Error::new(
115 input,
116 nom::error::ErrorKind::Verify,
117 )));
118 }
119
120 let mut text_len = next_special;
123 if next_special < text_fragment.len() && text_fragment[next_special..].starts_with('\n') {
124 let mut trailing_spaces = 0;
126 for ch in text_fragment[..next_special].chars().rev() {
127 if ch == ' ' {
128 trailing_spaces += 1;
129 } else {
130 break;
131 }
132 }
133
134 if trailing_spaces >= 2 {
137 text_len = next_special - trailing_spaces;
138 }
139 }
140
141 if text_len == 0 {
142 return Err(nom::Err::Error(nom::error::Error::new(
144 input,
145 nom::error::ErrorKind::Verify,
146 )));
147 }
148
149 let text_content = input.take(text_len);
151 let rest = input.take_from(text_len);
152
153 let span = opt_span(text_content);
154
155 let node = Node {
156 kind: NodeKind::Text(text_content.fragment().to_string()),
157 span,
158 children: Vec::new(),
159 };
160
161 Ok((rest, node))
162}
163
164pub fn parse_special_as_text(input: GrammarSpan) -> IResult<GrammarSpan, Node> {
177 let text_fragment = input.fragment();
178
179 if text_fragment.is_empty() {
180 return Err(nom::Err::Error(nom::error::Error::new(
181 input,
182 nom::error::ErrorKind::Eof,
183 )));
184 }
185
186 let char_len = if text_fragment.starts_with('`') {
189 text_fragment.chars().take_while(|&c| c == '`').count()
191 } else {
192 text_fragment
193 .chars()
194 .next()
195 .map(|c| c.len_utf8())
196 .unwrap_or(1)
197 };
198
199 let (rest, text_content) = take(char_len).parse(input)?;
200
201 let span = opt_span(text_content);
202
203 let node = Node {
204 kind: NodeKind::Text(text_content.fragment().to_string()),
205 span,
206 children: Vec::new(),
207 };
208
209 Ok((rest, node))
210}
211
212#[cfg(test)]
213mod tests {
214 use super::*;
215
216 #[test]
217 fn smoke_test_parse_text_basic() {
218 let input = GrammarSpan::new("Hello World*");
219 let result = parse_text(input);
220
221 assert!(result.is_ok(), "Failed to parse plain text");
222 let (rest, node) = result.unwrap();
223
224 assert_eq!(rest.fragment(), &"*");
225
226 if let NodeKind::Text(text) = &node.kind {
227 assert_eq!(text, "Hello World");
228 } else {
229 panic!("Expected Text node");
230 }
231 }
232
233 #[test]
234 fn smoke_test_parse_text_up_to_special() {
235 let input = GrammarSpan::new("text with `code`");
236 let result = parse_text(input);
237
238 assert!(result.is_ok());
239 let (rest, node) = result.unwrap();
240
241 assert_eq!(rest.fragment(), &"`code`");
242
243 if let NodeKind::Text(text) = &node.kind {
244 assert_eq!(text, "text with ");
245 }
246 }
247
248 #[test]
249 fn smoke_test_parse_text_trailing_spaces() {
250 let input = GrammarSpan::new("text \n");
251 let result = parse_text(input);
252
253 assert!(result.is_ok());
254 let (rest, node) = result.unwrap();
255
256 assert_eq!(rest.fragment(), &" \n");
258
259 if let NodeKind::Text(text) = &node.kind {
260 assert_eq!(text, "text");
261 }
262 }
263
264 #[test]
265 fn smoke_test_parse_text_starts_with_special() {
266 let input = GrammarSpan::new("*emphasis*");
267 let result = parse_text(input);
268
269 assert!(
270 result.is_err(),
271 "Should not parse text starting with special char"
272 );
273 }
274
275 #[test]
276 fn smoke_test_parse_special_as_text_asterisk() {
277 let input = GrammarSpan::new("* not emphasis");
278 let result = parse_special_as_text(input);
279
280 assert!(result.is_ok(), "Failed to parse special as text");
281 let (rest, node) = result.unwrap();
282
283 assert_eq!(rest.fragment(), &" not emphasis");
284
285 if let NodeKind::Text(text) = &node.kind {
286 assert_eq!(text, "*");
287 }
288 }
289
290 #[test]
291 fn smoke_test_parse_special_as_text_backticks() {
292 let input = GrammarSpan::new("```not code");
293 let result = parse_special_as_text(input);
294
295 assert!(result.is_ok());
296 let (rest, node) = result.unwrap();
297
298 assert_eq!(rest.fragment(), &"not code");
299
300 if let NodeKind::Text(text) = &node.kind {
301 assert_eq!(text, "```");
302 }
303 }
304
305 #[test]
306 fn smoke_test_parse_text_position() {
307 let input = GrammarSpan::new("Hello*");
308 let result = parse_text(input);
309
310 assert!(result.is_ok());
311 let (_, node) = result.unwrap();
312
313 assert!(node.span.is_some(), "Text should have position info");
314
315 let span = node.span.unwrap();
316 assert_eq!(span.start.offset, 0);
317 assert_eq!(span.end.offset, 5); }
319}