df_ls_lexical_analysis 0.3.0-rc.1

A language server for Dwarf Fortress RAW files
Documentation
use super::*;
use df_ls_diagnostics::{DMExtraInfo, DiagnosticMessageSet, DiagnosticsInfo};
use std::collections::HashMap;
use std::rc::Rc;

pub fn tokenize_df_raw_file(
    source: String,
    load_diagnostic_messages: bool,
) -> (Rc<Tree>, DiagnosticsInfo) {
    let mut diagnostic_info = if load_diagnostic_messages {
        DiagnosticsInfo::load_from_file(
            DiagnosticMessageSet::Lexer,
            Some("DF RAW Language Server".to_owned()),
        )
    } else {
        DiagnosticsInfo::new(HashMap::new(), Some("DF RAW Language Server".to_owned()))
    };

    let tree = Rc::new(Tree::default());
    tree.add_tsnode(DataNode {
        id: ROOT_ID,
        kind_id: 0,
        kind: "raw_file".to_owned(),
        name: Some("raw_file".to_owned()),
        start_byte: 0,
        end_byte: source.len(),
        start_point: Point { row: 0, column: 0 },
        end_point: Point { row: 0, column: 0 },

        children_ids: vec![],
        parent_id: None,
        next_sibling_id: None,
        prev_sibling_id: None,
        tree: Rc::downgrade(&tree),
    });
    let mut tok_help = TokenizerHelper::new(source, &tree);
    let regex_list = RegexList::new();

    // Start tokenizing file
    match tokenize_file_content(&mut tok_help, &regex_list, &mut diagnostic_info) {
        Ok(_) => {}
        Err(err) => {
            // Error can be silent but logged in debug mode.
            // This is not a real error and is more to stop the parser at any moment.
            log::debug!("Tokenizer Ended with: {}", err);
        }
    }

    // Add EOF (End of File) token
    let eof_token = tok_help.create_start_tsnode("EOF", Some("EOF"));
    tok_help.add_node_to_tree(eof_token, ROOT_ID);

    // Update root
    let mut root = tree.get_tsnode(ROOT_ID).unwrap();
    root.end_point = tok_help.get_point();
    tree.update_node(ROOT_ID, root);

    tree.finalize_tree();

    (tree, diagnostic_info)
}

fn tokenize_file_content(
    tok_help: &mut TokenizerHelper,
    regex_list: &RegexList,
    diagnostics: &mut DiagnosticsInfo,
) -> TokenizerResult {
    header::tokenize_header(tok_help, regex_list, diagnostics)?;

    loop {
        // If next char is NOT `[` or `]`
        if !(tok_help.check_if_next_char_matches_any_of(&['[', ']'])) {
            // comment
            let comment = tok_help.get_next_match(
                &regex_list.comment,
                "comment",
                Some("comment"),
                true,
                true,
            );
            match comment {
                TokenMatchStatus::Ok(result) => {
                    tok_help.add_node_to_tree(result, ROOT_ID);
                }
                TokenMatchStatus::OkWithPrefixFound(_prefix, _result) => {
                    unreachable!(
                        "Current Regex of `comment` combined with \
                        `[` or `]` check above makes this unreachable."
                    )
                }
                TokenMatchStatus::EoF => break,
                TokenMatchStatus::NoMatch => unreachable!("Comment is optional"),
            }
        }

        // Look for if there is a separated `]` somewhere.
        if tok_help.check_if_next_char_match(']') {
            let unexpected_close_token = tok_help.get_next_match(
                &regex_list.token_close_bracket,
                "ERROR",
                Some("ERROR"),
                true,
                true,
            );
            match unexpected_close_token {
                TokenMatchStatus::Ok(result) => {
                    diagnostics.add_message(
                        // No extra template data needed
                        DMExtraInfo::new(result.get_range()),
                        "unexpected_end_bracket",
                    );
                    tok_help.add_node_to_tree(result, ROOT_ID);
                    // Look for comment again
                    continue;
                }
                TokenMatchStatus::OkWithPrefixFound(_prefix, _result) => {
                    unreachable!(
                        "Current Regex of `close_bracket` combined with \
                        `]` check above makes this unreachable."
                    )
                }
                TokenMatchStatus::EoF => break,
                TokenMatchStatus::NoMatch => unreachable!("Close bracket is optional"),
            }
        }

        // Check if EoF is reached, so we can stop checking early
        if tok_help.check_if_eof() {
            break;
        }

        // Tokenize a token: `[TOKEN_NAME:ARGS]`
        if tok_help.check_if_next_char_match('[') {
            token::tokenize_token(tok_help, regex_list, diagnostics)?;
        } else {
            // If this is reached we made a mistake somewhere in the code above.
            unreachable!(
                "Comment takes in everything except `[`, `]` and `EoF`, \
                next check was for `]` and `EoF`, So only `[` remains."
            );
        }

        // If we reach the limit, stop parsing the file further.
        if diagnostics.check_message_limit_reached() {
            crate::mark_rest_of_file_as_unchecked_tok(tok_help, diagnostics);
            break;
        }
    }
    TokenizerResult::Ok(())
}