djr 0.0.1

Djot parser written in pure Rust
Documentation
mod event;

use crate::{
    lex::{Lexer, TokenKind, Token},
    string::unicode::is_uri_friendly,
};
pub use event::{Event, LinkType, ListIndexType, ListType, Tag};
use std::collections::HashMap;

pub struct Parser<'a> {
    input: &'a str,
    lexer: Lexer<'a>,

    links: HashMap<&'a str, &'a str>,
    footnotes: HashMap<&'a str, Vec<Token>>,
}

impl<'a> Parser<'a> {
    pub fn new(input: &'a str) -> Self {
        let mut lexer = Lexer::new(input);
        let mut links = HashMap::new();
        let mut footnotes = HashMap::new();

        // We need to find a map of valid link and footnote definitions, which means we need to do
        // two passes. Below is the first pass, which tries to avoid doing too much, but which
        // populates the `links` and `footnotes` maps and locates a few other events.
        while let Some(token) = lexer.next() {
            match token.kind {
                TokenKind::CodeFence(level) => {
                    let _start = token.range.end;
                    let mut _end = 0;
                    for t in lexer.by_ref() {
                        if let TokenKind::CodeFence(l) = t.kind {
                            if l >= level {
                                _end = t.range.start;
                                break;
                            }
                        }
                    }
                }

                TokenKind::LinkDefinition => {
                    // Strip the `[` in the beginning and the `]: ` at the end.
                    let label = &input[token.range.start + 1..token.range.end - 3];

                    let start_def = token.range.end;
                    let end_def = lexer
                        .take_while(|t| matches!(t.kind, TokenKind::Str))
                        .last()
                        .unwrap()
                        .range
                        .end;
                    let def = &input[start_def..end_def];

                    // We won't bother doing a proper URI check. Just check that all the characters
                    // are valid.
                    if def.bytes().all(is_uri_friendly) {
                        links.insert(label, def);
                    }
                }

                TokenKind::FootnoteDefinition => {
                    // Strip the `[^` in the beginning and the `]: ` at the end.
                    let label = &input[token.range.start + 2..token.range.end - 3];

                    let mut tokens = Vec::new();
                    for t in lexer.by_ref() {
                        // Handle block continuation
                        if let TokenKind::Blankline = t.kind {
                            break;
                        }
                        tokens.push(t);
                    }

                    footnotes.insert(label, tokens);
                }

                _ => {}
            }
        }

        Self {
            input,
            lexer: Lexer::new(input),
            links,
            footnotes,
        }
    }
}

impl<'a> Iterator for Parser<'a> {
    type Item = Event<'a>;

    fn next(&mut self) -> Option<Self::Item> {
        None
    }
}