1use std::borrow::Cow;
2use std::fmt::Display;
3
4use crate::constants::{
5 BACKSLASH, COLON, HYPHEN, LANGLE, LBRACK, LPAREN, POUND, RANGLE, RBRACK, RPAREN, SLASH,
6};
7use crate::node_pool::NodeID;
8use crate::parse::parse_object;
9use crate::types::{Cursor, MarkupKind, MatchError, ParseOpts, Parseable, Parser, Result};
10use crate::utils::Match;
11
12const ORG_LINK_PARAMETERS: [&str; 9] = [
13 "shell", "news", "mailto", "https", "http", "ftp", "help", "file", "elisp",
14];
15
16#[derive(Debug, Clone)]
17pub struct RegularLink<'a> {
18 pub path: Match<PathReg<'a>>,
19 pub description: Option<Vec<NodeID>>,
25}
26
27impl Display for PathReg<'_> {
28 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
29 match self {
30 PathReg::PlainLink(link) => {
31 f.write_fmt(format_args!("{}:{}", link.protocol, link.path))
32 }
33 PathReg::Id(inner) => f.write_fmt(format_args!("id:{inner}")),
34 PathReg::CustomId(inner) => f.write_fmt(format_args!("#{inner}")),
35 PathReg::Coderef(inner) => f.write_fmt(format_args!("({inner})")),
36 PathReg::Unspecified(inner) => f.write_fmt(format_args!("{inner}")),
37 PathReg::File(inner) => f.write_fmt(format_args!("file:{inner}")),
38 }
39 }
40}
41
42#[derive(Debug, Clone, PartialEq, Eq)]
43pub struct PlainLink<'a> {
44 pub protocol: Cow<'a, str>,
45 pub path: Cow<'a, str>,
46}
47
48impl From<&PlainLink<'_>> for String {
49 fn from(value: &PlainLink) -> Self {
50 format!("{}:{}", value.protocol, value.path)
51 }
52}
53
54#[derive(Debug, Clone)]
56pub enum PathReg<'a> {
57 PlainLink(PlainLink<'a>),
58 Id(&'a str),
59 CustomId(&'a str),
61 Coderef(&'a str),
63 File(Cow<'a, str>),
64 Unspecified(Cow<'a, str>),
65 }
71
72impl<'a> PathReg<'a> {
73 fn new(cursor: Cursor<'a>) -> Self {
74 match cursor.curr() {
75 b'i' => {
76 if let Ok(id) = PathReg::parse_id(cursor) {
77 return PathReg::Id(id);
78 } else if let Ok(link) = parse_plain_link(cursor) {
79 return PathReg::PlainLink(link.obj);
80 }
81 }
82 b'f' => {
83 if let Ok(file_path) = PathReg::parse_file(cursor) {
84 return PathReg::File(file_path.into());
85 } else if let Ok(link) = parse_plain_link(cursor) {
86 return PathReg::PlainLink(link.obj);
87 }
88 }
89 POUND => {
90 return PathReg::CustomId(cursor.clamp(cursor.index + 1, cursor.len()));
92 }
93 LPAREN => {
94 if cursor[cursor.len() - 1] == RPAREN {
96 return PathReg::Coderef(cursor.clamp(cursor.index + 1, cursor.len()));
97 }
98 }
99 chr => {
100 if let Ok(link) = parse_plain_link(cursor) {
101 return PathReg::PlainLink(link.obj);
102 }
103 }
104 }
105 return PathReg::Unspecified(cursor.clamp_forwards(cursor.len()).into());
112 }
113
114 fn parse_id(mut cursor: Cursor<'a>) -> Result<&'a str> {
115 cursor.word("id:")?;
116 let begin_id = cursor.index;
117
118 while let Ok(num) = cursor.try_curr() {
119 if !num.is_ascii_hexdigit() || num == HYPHEN {
120 return Err(MatchError::InvalidLogic);
121 }
122 cursor.next();
123 }
124
125 return Ok(cursor.clamp_backwards(begin_id));
126 }
127
128 fn parse_file(mut cursor: Cursor<'a>) -> Result<&'a str> {
129 cursor.word("file:")?;
130 let begin_id = cursor.index;
131
132 while let Ok(num) = cursor.try_curr() {
133 cursor.next();
134 }
135
136 return Ok(cursor.clamp_backwards(begin_id));
137 }
138}
139
140impl<'a> Parseable<'a> for RegularLink<'a> {
141 fn parse(
142 parser: &mut Parser<'a>,
143 mut cursor: Cursor<'a>,
144 parent: Option<NodeID>,
145 mut parse_opts: ParseOpts,
146 ) -> Result<NodeID> {
147 let start = cursor.index;
148 cursor.word("[[")?;
149
150 loop {
152 match cursor.try_curr()? {
153 BACKSLASH => {
154 if let BACKSLASH | LBRACK | RBRACK = cursor.peek(1)? {
156 cursor.advance(2);
157 } else {
158 return Err(MatchError::InvalidLogic);
159 }
160 }
161 RBRACK => {
162 if cursor.index == start + 2 {
164 return Err(MatchError::InvalidLogic);
165 }
166
167 if LBRACK == cursor.peek(1)? {
168 let path_reg_end = cursor.index;
169
170 cursor.advance(2);
172 parse_opts.from_object = false;
173 parse_opts.markup.insert(MarkupKind::Link);
174
175 let mut content_vec: Vec<NodeID> = Vec::new();
176 loop {
177 match parse_object(parser, cursor, parent, parse_opts) {
178 Ok(id) => {
179 cursor.index = parser.pool[id].end;
180 content_vec.push(id);
181 }
182 Err(MatchError::MarkupEnd(kind)) => {
183 if !kind.contains(MarkupKind::Link) {
184 return Err(MatchError::InvalidLogic);
186 }
187
188 let reg_curs = cursor.clamp_off(start + 2, path_reg_end);
189 let pathreg = Match {
190 start: start + 2,
191 end: path_reg_end,
192 obj: PathReg::new(reg_curs),
193 };
194
195 let new_id = parser.pool.reserve_id();
198 for id in &mut content_vec {
199 parser.pool[*id].parent = Some(new_id);
200 }
201
202 return Ok(parser.alloc_with_id(
203 Self {
204 path: pathreg,
205 description: Some(content_vec),
206 },
207 start,
208 cursor.index + 2, parent,
210 new_id,
211 ));
212 }
213 ret @ Err(_) => return ret,
214 }
215 }
216 } else if RBRACK == cursor.peek(1)? {
217 let reg_curs = cursor.clamp_off(start + 2, cursor.index);
220 let pathreg = Match {
221 start: start + 2,
222 end: cursor.index,
223 obj: PathReg::new(reg_curs),
224 };
225
226 return Ok(parser.alloc(
227 Self {
228 path: pathreg,
229 description: None,
230 },
231 start,
232 cursor.index + 2,
233 parent,
234 ));
235 } else {
236 return Err(MatchError::InvalidLogic);
237 }
238 }
239 _ => {}
240 }
241 cursor.next();
242 }
243 }
244}
245
246pub(crate) fn parse_plain_link(mut cursor: Cursor<'_>) -> Result<Match<PlainLink<'_>>> {
263 if let Ok(pre_byte) = cursor.peek_rev(1) {
264 if pre_byte.is_ascii_alphanumeric() {
265 return Err(MatchError::InvalidLogic);
266 }
267 }
268 let start = cursor.index;
269
270 for (i, &protocol) in ORG_LINK_PARAMETERS.iter().enumerate() {
271 if cursor.word(protocol).is_ok() {
274 if cursor.try_curr()? == COLON {
275 cursor.next();
276 let path_start = cursor.index;
277 while let Ok(byte) = cursor.try_curr() {
280 match byte {
281 RANGLE | LPAREN | RPAREN | LANGLE | b'\t' | b'\n' | b'\x0C' | b'\r'
282 | b' ' => {
283 break;
284 }
285 _ => {
287 cursor.next();
288 }
289 }
290 }
291
292 let last_link_byte = cursor[cursor.index - 1];
293 while !cursor.peek_rev(1)?.is_ascii_alphanumeric() && cursor.peek_rev(1)? != SLASH {
306 cursor.prev();
307 if cursor.index <= path_start {
308 return Err(MatchError::InvalidLogic);
309 }
310 }
311
312 if if let Ok(future_byte) = cursor.try_curr() {
313 !future_byte.is_ascii_alphanumeric()
314 } else {
315 true
316 } {
317 return Ok(Match {
318 start,
319 end: cursor.index,
320 obj: PlainLink {
321 protocol: protocol.into(),
322 path: cursor.clamp_backwards(path_start).into(),
323 },
324 });
325 } else {
326 return Err(MatchError::EofError);
327 }
328 } else {
329 cursor.index -= protocol.len();
330 }
331 }
332 }
333
334 Err(MatchError::InvalidLogic)
335}
336
337pub(crate) fn parse_angle_link<'a>(
338 parser: &mut Parser<'a>,
339 mut cursor: Cursor<'a>,
340 parent: Option<NodeID>,
341 parse_opts: ParseOpts,
342) -> Result<NodeID> {
343 let start = cursor.index;
344
345 cursor.next();
346
347 for (i, &protocol) in ORG_LINK_PARAMETERS.iter().enumerate() {
348 if cursor.word(protocol).is_ok() {
349 if cursor.try_curr()? == COLON {
350 cursor.next();
351 let path_start = cursor.index;
352 while let Ok(byte) = cursor.try_curr() {
353 match byte {
354 RBRACK | LANGLE | b'\n' => return Err(MatchError::InvalidLogic),
355 RANGLE => break,
356 _ => {
357 cursor.next();
358 }
359 }
360 }
361
362 return Ok(parser.alloc(
365 PlainLink {
366 protocol: protocol.into(),
367 path: cursor.clamp_backwards(path_start).into(),
368 },
369 start,
370 cursor.index + 1, parent,
372 ));
373 } else {
374 cursor.index -= protocol.len();
375 }
376 }
377 }
378
379 Err(MatchError::InvalidLogic)
380}
381
382#[cfg(test)]
383mod tests {
384 use pretty_assertions::assert_eq;
385
386 use crate::expr_in_pool;
387 use crate::object::PlainLink;
388 use crate::parse_org;
389 use crate::types::Expr;
390
391 #[test]
392 fn basic_plain_link() {
393 let input = "https://swag.org";
394 let parsed = parse_org(input);
395 let l = expr_in_pool!(parsed, PlainLink).unwrap();
396 assert_eq!(
397 l,
398 &PlainLink {
399 protocol: "https".into(),
400 path: "//swag.org".into()
401 }
402 )
403 }
404
405 #[test]
406 fn plain_link_subprotocol() {
407 let input = "http://swag.org";
409 let parsed = parse_org(input);
410 let l = expr_in_pool!(parsed, PlainLink).unwrap();
411 assert_eq!(
412 l,
413 &PlainLink {
414 protocol: "http".into(),
415 path: "//swag.org".into()
416 }
417 )
418 }
419
420 #[test]
421 fn plain_link_after() {
422 let input = "http://swag.com meow";
423 let parsed = parse_org(input);
424 let l = expr_in_pool!(parsed, PlainLink).unwrap();
425 assert_eq!(
426 l,
427 &PlainLink {
428 protocol: "http".into(),
429 path: "//swag.com".into()
430 }
431 )
432 }
433
434 #[test]
435 fn plain_link_ws_end() {
436 let input = " mailto:swag@cool.com ";
438 let parsed = parse_org(input);
439 let l = expr_in_pool!(parsed, PlainLink).unwrap();
440
441 assert_eq!(
442 l,
443 &PlainLink {
444 protocol: "mailto".into(),
445 path: "swag@cool.com".into()
446 }
447 )
448 }
449
450 #[test]
451 fn plain_link_word_constituent() {
452 let input = " https://one_two_three_https______..............~~~! ";
454 let parsed = parse_org(input);
455 let l = expr_in_pool!(parsed, PlainLink).unwrap();
456
457 assert_eq!(
458 l,
459 &PlainLink {
460 protocol: "https".into(),
461 path: "//one_two_three_https".into()
462 }
463 )
464 }
465
466 #[test]
467 fn plain_link_word_constituent_slash() {
468 let input = " https://one_two_three_https______/..............~~~! ";
470 let parsed = parse_org(input);
471 let l = expr_in_pool!(parsed, PlainLink).unwrap();
472
473 assert_eq!(
474 l,
475 &PlainLink {
476 protocol: "https".into(),
477 path: "//one_two_three_https______/".into()
478 }
479 )
480 }
481
482 #[test]
483 fn basic_angle_link() {
484 let input = " <https://one two !!@#!OIO DJDFK Jk> ";
486 let parsed = parse_org(input);
487 let l = expr_in_pool!(parsed, PlainLink).unwrap();
488
489 assert_eq!(
490 l,
491 &PlainLink {
492 protocol: "https".into(),
493 path: "//one two !!@#!OIO DJDFK Jk".into()
494 }
495 )
496 }
497
498 #[test]
499 fn basic_regular_link() {
500 let input = "[[hps://.org]]";
501 let pool = parse_org(input);
502 pool.print_tree();
503 }
504
505 #[test]
506 fn regular_link_malformed() {
507 let input = "
508word
509[#A]
510";
511 let pool = parse_org(input);
512 pool.print_tree();
513 }
514
515 #[test]
516 fn regular_link_description() {
517 let input = " [[https://meo][cool site]]";
518 let pool = parse_org(input);
519 pool.print_tree();
520 }
521
522 #[test]
523 fn regular_link_unclosed_recursive_markup() {
524 let input = " [[https://meo][cool *site* ~one two~ three *four ]]";
525 let pool = parse_org(input);
526 pool.print_tree();
527 }
528
529 #[test]
530 fn regular_link_unclosed_plain_markup() {
531 let input = " [[https://meo][cool *site* ~one two~ three *four ~five six ]]";
532 let pool = parse_org(input);
533 pool.print_tree();
534 }
535
536 #[test]
537 fn file_link() {
538 let input = r"
539I'll be skipping over the instrumentals unless there's reason to.
540
541[[file:bmc.jpg]]
542** songs
543";
544
545 let pool = parse_org(input);
546 pool.print_tree();
547 }
548}