md_inc/
parse.rs

1use crate::config::{DEFAULT_END_COMMAND, DEFAULT_TAG_BEGIN, DEFAULT_TAG_END};
2use anyhow::{Context, Result};
3use nom::bytes::complete::{take, take_until, take_while};
4use nom::character::complete::{none_of, space0, space1};
5use nom::character::is_alphanumeric;
6use nom::combinator::map;
7use nom::multi::{count, fold_many0, many0, many_till, separated_nonempty_list};
8use nom::sequence::{delimited, pair, separated_pair, tuple};
9use nom::{
10    branch::alt,
11    bytes::complete::{escaped_transform, tag},
12    character::complete::char,
13    IResult,
14};
15use std::path::PathBuf;
16
17#[derive(Debug, Clone, PartialEq)]
18pub(crate) struct Command<'a> {
19    command: &'a str,
20    args: Vec<&'a str>,
21}
22impl<'a> Command<'a> {
23    pub fn new(command: &'a str, args: Vec<&'a str>) -> Self {
24        Self { command, args }
25    }
26}
27#[derive(Debug, Clone, PartialEq)]
28struct CommandSec<'a> {
29    commands: Vec<Command<'a>>,
30    start_remaining: usize,
31    end_remaining: usize,
32}
33
34impl CommandSec<'_> {
35    pub fn start(&self, input: &str) -> usize {
36        input.len() - self.start_remaining
37    }
38    pub fn end(&self, input: &str) -> usize {
39        input.len() - self.end_remaining
40    }
41}
42
43fn wrapped_string(input: &str) -> IResult<&str, &str> {
44    let (i, cnt) = fold_many0(tag("#"), 0, |x, _| x + 1)(input)?;
45    let (i, _) = tag("\"")(i)?;
46    let end = pair(tag("\""), count(tag("#"), cnt));
47    let (i, (inner, _)) = many_till(take(1u32), end)(i)?;
48    let offset = cnt + 1;
49    Ok((i, &input[offset..offset + inner.len()]))
50}
51
52static EXTRA_STRING_CHARS: &'static [u8] = "/\\-_.".as_bytes();
53
54fn is_string_char(i: char) -> bool {
55    if i.is_ascii() {
56        let i = i as u8;
57        is_alphanumeric(i) || EXTRA_STRING_CHARS.contains(&i)
58    } else {
59        false
60    }
61}
62
63fn inner_string(i: &str) -> IResult<&str, &str> {
64    take_while(is_string_char)(i)
65}
66
67fn maybe_wrapped_string(i: &str) -> IResult<&str, &str> {
68    alt((wrapped_string, inner_string))(i)
69}
70
71fn command_args(i: &str) -> IResult<&str, Vec<&str>> {
72    let (i, _) = space0(i)?;
73    let (i, mut res) = separated_nonempty_list(space1, maybe_wrapped_string)(i)?;
74    if matches!(res.last(), Some(x) if x.is_empty()) {
75        res.pop();
76    }
77    Ok((i, res))
78}
79
80fn command<'a>(i: &'a str) -> IResult<&'a str, Command> {
81    alt((
82        map(
83            separated_pair(
84                maybe_wrapped_string,
85                tuple((space0, tag(":"), space0)),
86                command_args,
87            ),
88            move |(command, args): (&'a str, Vec<&'a str>)| Command::new(command, args),
89        ),
90        map(maybe_wrapped_string, move |command| {
91            Command::new(command, vec![])
92        }),
93    ))(i)
94}
95
96fn command_block<'a>(tags: &'a CommandTags, input: &'a str) -> IResult<&'a str, CommandSec<'a>> {
97    let start_remaining = input.len();
98    let (i, _open) = tag(tags.opening.as_str())(input)?;
99    let (i, command_1) = delimited(space0, command, space0)(i)?;
100    let (rest, (mut other_commands, _end)) = many_till(
101        delimited(delimited(space0, char('|'), space0), command, space0),
102        tag(tags.closing.as_str()),
103    )(i)?;
104
105    let mut commands = vec![command_1];
106    commands.append(&mut other_commands);
107    let end_remaining = rest.len();
108    Ok((
109        rest,
110        CommandSec {
111            commands,
112            start_remaining,
113            end_remaining,
114        },
115    ))
116}
117
118// fn end_block(i: &str, end_command: &str) -> IResult<&str, &str> {
119//     tuple((tag(begin), space0, tag(end_command), space0, tag(end)))(i)
120// }
121
122fn next_command_block<'a>(
123    tags: &'a CommandTags,
124) -> impl Fn(&'a str) -> IResult<&'a str, CommandSec<'a>> {
125    move |i: &'a str| -> IResult<&'a str, CommandSec<'a>> {
126        let mut input = i;
127        loop {
128            // Skip to next match...
129            let (i, _) = take_until::<&'a str, &'a str, (&'a str, nom::error::ErrorKind)>(
130                &tags.opening,
131            )(input)?;
132            if let Ok(x) = command_block(tags, i) {
133                return Ok(x);
134            }
135            input = &input[1..];
136        }
137    }
138}
139#[derive(Clone, Debug)]
140pub struct CommandTags {
141    pub opening: String,
142    pub closing: String,
143}
144impl CommandTags {
145    pub fn new<S: Into<String>>(opening: S, closing: S) -> Self {
146        Self {
147            opening: opening.into(),
148            closing: closing.into(),
149        }
150    }
151}
152
153#[derive(Clone, Debug)]
154pub struct ParserConfig {
155    pub tags: CommandTags,
156    pub end_command: String,
157    pub base_dir: PathBuf,
158}
159
160impl Default for ParserConfig {
161    fn default() -> Self {
162        ParserConfig {
163            tags: CommandTags::new(DEFAULT_TAG_BEGIN, DEFAULT_TAG_END),
164            end_command: DEFAULT_END_COMMAND.to_string(),
165            base_dir: std::env::current_dir().unwrap(),
166        }
167    }
168}
169
170pub(crate) enum Span {
171    Existing((usize, usize)),
172    Replace(String),
173}
174
175// pub fn get_args<S: AsRef<str>>(raw_args: S) -> S {
176//     // Group into quotes (and remove escapes)
177//     let re = regex::Regex::new(r#""()""#)
178// }
179
180pub fn escaped<'a>(input: &'a str) -> String {
181    let get = move |input: &'a str| -> IResult<&'a str, String> {
182        use nom::{alt, tag};
183        escaped_transform(none_of("\\"), '\\', |i: &str| {
184            alt!(i,
185                tag!("\\")       => { |_| "\\" }
186              | tag!("\"")       => { |_| "\"" }
187              | tag!("n")        => { |_| "\n" }
188            )
189        })(input)
190    };
191    get(input)
192        .map(|(_, x)| x)
193        .unwrap_or_else(|_err| String::new())
194}
195
196pub(crate) fn transform<S: AsRef<str>>(input: S, command: &Command) -> Result<String> {
197    let input = input.as_ref();
198    let args = &command.args;
199    let command = command.command;
200    Ok(match command {
201        "code" => match args.first() {
202            Some(language) => format!("```{}\n{}\n```", language, input),
203            _ => format!("```\n{}\n```", input),
204        },
205        "lines" => {
206            let from_line = args
207                .get(0)
208                .map(|x| x.parse().context("Invalid 'from' line"))
209                .unwrap_or(Ok(1))?
210                - 1;
211            let to_line = args
212                .get(1)
213                .map(|x| x.parse().context("Invalid 'to' line"))
214                .unwrap_or(Ok(input.len()))?;
215            (&input
216                .lines()
217                .skip(from_line)
218                .take(to_line - from_line)
219                .collect::<Vec<&str>>())
220                .join("\n")
221        }
222        "line" => args
223            .iter()
224            .map(|x| -> Result<String> {
225                let line = x.parse::<usize>().context("Invalid line")? - 1;
226                Ok(input
227                    .lines()
228                    .skip(line)
229                    .next()
230                    .context("Missing line")?
231                    .to_string())
232            })
233            .collect::<Result<Vec<String>>>()?
234            .join("\n"),
235        "line-numbers" => {
236            let separator = args.get(0).unwrap_or(&": ");
237            let width = args
238                .get(1)
239                .and_then(|x| x.parse::<usize>().ok())
240                .unwrap_or_else(|| input.lines().count().to_string().len());
241
242            input
243                .lines()
244                .enumerate()
245                .map(|(i, x)| match i {
246                    0 => format!("{:>w$}{}{}", i + 1, separator, x, w = width),
247                    _ => format!("\n{:>w$}{}{}", i + 1, separator, x, w = width),
248                })
249                .collect::<String>()
250        }
251        "wrap" => {
252            let before = args.get(0).context("Missing 'before' wrap argument")?;
253            let after = args.get(1).unwrap_or(before);
254            format!("{}{}{}", escaped(before), input, escaped(after))
255        }
256        "wrap-lines" => {
257            let before = args.get(0).context("Missing 'before' wrap argument")?;
258            let after = args.get(1).unwrap_or(before);
259            let before = escaped(before);
260            let after = escaped(after);
261            input
262                .lines()
263                .enumerate()
264                .flat_map(|(i, x)| match i {
265                    0 => vec![before.as_str(), x, after.as_str()],
266                    _ => vec!["\n", before.as_str(), x, after.as_str()],
267                })
268                .collect()
269        }
270        "match" => {
271            let re = args.get(0).context("Missing regex string given")?.as_ref();
272            let re = regex::Regex::new(re)?;
273            let group = args
274                .get(1)
275                .map(|&x| x.parse().context("Invalid group number"))
276                .unwrap_or(Ok(0))?; // Captrue all if no group specified
277            let m = re.captures(input).context("Could not find match")?;
278            let group = m
279                .get(group)
280                .with_context(|| format!("Only {} groups in match", m.len()))?;
281            input[group.start()..group.end()].to_string()
282        }
283
284        // Todo:
285        // Structured data (Csv, Json...) - row & column sorting, filtering, into table
286        _ => input.to_string(), // No transforms
287    })
288}
289
290pub struct Parser {
291    pub config: ParserConfig,
292    pub content: String,
293}
294
295impl Parser {
296    pub fn new(config: ParserConfig, content: String) -> Self {
297        Self { config, content }
298    }
299
300    fn command_blocks(&self) -> IResult<&str, Vec<CommandSec>> {
301        let cmd = next_command_block(&self.config.tags);
302        many0(cmd)(&self.content)
303    }
304
305    fn command_groups(&self) -> Result<Vec<(CommandSec, CommandSec)>> {
306        let (_, commands) = match self.command_blocks() {
307            Ok(x) => x,
308            Err(err) => {
309                return Err(anyhow::anyhow!("Failed parsing: {}", format!("{}", err)));
310            }
311        };
312
313        let mut begin_blocks: Vec<CommandSec> = vec![];
314        let mut end_blocks: Vec<CommandSec> = vec![];
315        for command in commands {
316            match command.commands.first() {
317                Some(x) if x.command == self.config.end_command => end_blocks.push(command),
318                _ => begin_blocks.push(command),
319            }
320        }
321
322        if begin_blocks.len() != end_blocks.len() {
323            return Err(anyhow::anyhow!(
324                "Mismatch between command count ({}) and end count ({})",
325                begin_blocks.len(),
326                end_blocks.len()
327            ));
328        }
329        begin_blocks
330            .into_iter()
331            .zip(end_blocks)
332            .map(|(begin, end)| {
333                if begin.end(&self.content) >= end.start(&self.content) {
334                    return Err(anyhow::anyhow!(
335                        "Found extra end block before command: ({})",
336                        begin
337                            .commands
338                            .into_iter()
339                            .map(|x| format!("{:?}", x))
340                            .collect::<Vec<_>>()
341                            .join(" | ")
342                    ));
343                }
344                Ok((begin, end))
345            })
346            .collect::<Result<Vec<_>>>()
347    }
348
349    ///
350    ///
351    /// # Parameters
352    /// * relative_filepath A filepath relative to `dir`
353    pub fn parse(&self) -> Result<String> {
354        let groups = self.command_groups()?;
355        let mut prev_end = 0;
356        let mut spans: Vec<Span> = vec![];
357        for (begin, end) in groups {
358            let filename = begin.commands.get(0).context("No filename")?.command;
359            let filename = self.config.base_dir.join(&filename);
360            let contents = std::fs::read_to_string(&filename).with_context(|| {
361                let before = begin.start(&self.content);
362                let line_num = self.content[0..before].lines().count();
363
364                format!(
365                    "On line {}: Could not read '{:?}'\n  while parsing: {:?}",
366                    line_num,
367                    &filename,
368                    &self.content[begin.start(&self.content)..begin.end(&self.content)],
369                )
370            })?;
371            spans.push(Span::Existing((prev_end, begin.end(&self.content))));
372            let mut output = contents.trim().to_string();
373            for cmd in begin.commands.iter().skip(1) {
374                output = transform(&output, &cmd)?;
375            }
376            spans.push(Span::Replace(output));
377            prev_end = end.start(&self.content);
378        }
379        spans.push(Span::Existing((prev_end, self.content.len())));
380
381        Ok(spans
382            .iter()
383            .map(|x| -> &str {
384                match x {
385                    Span::Existing((begin, end)) => &self.content[*begin..*end],
386                    Span::Replace(text) => text.as_str(),
387                }
388            })
389            .collect::<Vec<&str>>()
390            .join("\n"))
391    }
392}
393
394#[cfg(test)]
395mod test {
396    use super::*;
397
398    #[test]
399    fn strings() {
400        assert_eq!(wrapped_string(r#""hello"X"#), Ok(("X", "hello")));
401        assert_eq!(wrapped_string(r##"#"hello"#X"##), Ok(("X", "hello")));
402        assert_eq!(wrapped_string(r###"##"hello"##X"###), Ok(("X", "hello")));
403        assert_eq!(maybe_wrapped_string(r#""hello"X"#), Ok(("X", "hello")));
404        assert_eq!(
405            maybe_wrapped_string(r#"abc/def\ghi.txt|"#),
406            Ok(("|", "abc/def\\ghi.txt"))
407        );
408        assert_eq!(maybe_wrapped_string(r##"#"hello"#X"##), Ok(("X", "hello")));
409        assert_eq!(
410            maybe_wrapped_string(r##"#"using " is ok"#X"##),
411            Ok(("X", "using \" is ok"))
412        );
413    }
414
415    #[test]
416    fn test_command_args() {
417        assert_eq!(
418            command_args(r#"one two "three 3" |"#),
419            Ok(("|", vec!["one", "two", "three 3"]))
420        );
421        assert_eq!(command_args(r#" one |"#), Ok(("|", vec!["one"])));
422        assert_eq!(command_args(r#" " one " |"#), Ok(("|", vec![" one "])));
423        assert_eq!(
424            command_args(r#" one "two  three"   | "#),
425            Ok(("| ", vec!["one", "two  three"]))
426        );
427    }
428    #[test]
429    fn test_command() {
430        assert_eq!(
431            command("cmd"),
432            Ok((
433                "",
434                Command {
435                    command: "cmd",
436                    args: vec![]
437                }
438            ))
439        );
440        assert_eq!(
441            command(r#"cmd: a b "c d""#),
442            Ok((
443                "",
444                Command {
445                    command: "cmd",
446                    args: vec!["a", "b", "c d"]
447                }
448            ))
449        );
450    }
451
452    #[test]
453    fn test_command_block() {
454        let tags = CommandTags::new("{{", "}}");
455
456        assert_eq!(
457            command_block(&tags, "{{ cmd1: a1 | cmd2: a2 a2.1 }} X"),
458            Ok((
459                " X",
460                CommandSec {
461                    start_remaining: 32,
462                    end_remaining: 2,
463                    commands: vec![
464                        Command {
465                            command: "cmd1",
466                            args: vec!["a1"]
467                        },
468                        Command {
469                            command: "cmd2",
470                            args: vec!["a2", "a2.1"]
471                        },
472                    ]
473                }
474            ))
475        );
476        let tags = CommandTags::new("<!--{{", "}}-->");
477        assert_eq!(
478            command_block(&tags, "<!--{{ cmd1: a1 | cmd2: a2 a2.1 }}--> X"),
479            Ok((
480                " X",
481                CommandSec {
482                    start_remaining: 39,
483                    end_remaining: 2,
484                    commands: vec![
485                        Command {
486                            command: "cmd1",
487                            args: vec!["a1"]
488                        },
489                        Command {
490                            command: "cmd2",
491                            args: vec!["a2", "a2.1"]
492                        },
493                    ]
494                }
495            ))
496        );
497    }
498    #[test]
499    fn test_command_block_with_pipe_ends() {
500        let tags = CommandTags::new("(|", "|)");
501        assert_eq!(
502            command_block(&tags, "(| cmd1: a1 | cmd2: a2 a2.1 |) X"),
503            Ok((
504                " X",
505                CommandSec {
506                    start_remaining: 32,
507                    end_remaining: 2,
508                    commands: vec![
509                        Command {
510                            command: "cmd1",
511                            args: vec!["a1"]
512                        },
513                        Command {
514                            command: "cmd2",
515                            args: vec!["a2", "a2.1"]
516                        },
517                    ]
518                }
519            ))
520        );
521        let tags = CommandTags::new("(|", "|)");
522        assert_eq!(
523            command_block(&tags, r#"(|cmd1: a1|cmd2: a2 "a2 |) 3 4"|) X"#),
524            Ok((
525                " X",
526                CommandSec {
527                    start_remaining: 35,
528                    end_remaining: 2,
529                    commands: vec![
530                        Command {
531                            command: "cmd1",
532                            args: vec!["a1"]
533                        },
534                        Command {
535                            command: "cmd2",
536                            args: vec!["a2", "a2 |) 3 4"]
537                        },
538                    ]
539                }
540            ))
541        );
542    }
543    #[test]
544    fn test_next_command_block() {
545        let tags = CommandTags::new("(|", "|)");
546        assert_eq!(
547            next_command_block(&tags)(r#"A(|cmd|)Z"#),
548            Ok((
549                "Z",
550                CommandSec {
551                    start_remaining: 8,
552                    end_remaining: 1,
553                    commands: vec![Command {
554                        command: "cmd",
555                        args: vec![]
556                    },]
557                }
558            ))
559        );
560        assert_eq!(
561            next_command_block(&tags)(r#"A(|B(|cmd|)Z"#),
562            Ok((
563                "Z",
564                CommandSec {
565                    start_remaining: 8,
566                    end_remaining: 1,
567                    commands: vec![Command {
568                        command: "cmd",
569                        args: vec![]
570                    },]
571                }
572            ))
573        );
574        let res = next_command_block(&tags)(r#"A(|nothing"#);
575        assert!(res.is_err());
576    }
577    #[test]
578    fn test_command_blocks() {
579        let parser = Parser {
580            config: ParserConfig {
581                tags: CommandTags::new("(|", "|)"),
582                end_command: "end".to_string(),
583                base_dir: PathBuf::new(),
584            },
585            content: r#"A(|cmd|)X(|(|end|)Z"#.to_string(),
586        };
587        let blocks = parser.command_blocks();
588        assert_eq!(
589            blocks,
590            Ok((
591                "Z",
592                vec![
593                    CommandSec {
594                        start_remaining: 18,
595                        end_remaining: 11,
596                        commands: vec![Command {
597                            command: "cmd",
598                            args: vec![]
599                        },]
600                    },
601                    CommandSec {
602                        start_remaining: 8,
603                        end_remaining: 1,
604                        commands: vec![Command {
605                            command: "end",
606                            args: vec![]
607                        },]
608                    }
609                ]
610            ))
611        );
612    }
613
614    #[test]
615    fn test_command_groups() {
616        let parser = Parser {
617            config: ParserConfig {
618                tags: CommandTags::new("(|", "|)"),
619                end_command: "end".to_string(),
620                base_dir: PathBuf::new(),
621            },
622            content: r#"A(|cmd|)X(|(|end|)Z"#.to_string(),
623        };
624        let groups = parser.command_groups().unwrap();
625        assert_eq!(
626            groups,
627            vec![(
628                CommandSec {
629                    start_remaining: 18,
630                    end_remaining: 11,
631                    commands: vec![Command {
632                        command: "cmd",
633                        args: vec![]
634                    },]
635                },
636                CommandSec {
637                    start_remaining: 8,
638                    end_remaining: 1,
639                    commands: vec![Command {
640                        command: "end",
641                        args: vec![]
642                    },]
643                }
644            )]
645        );
646    }
647}