snipgrep 0.1.0

Code Documentation Made Simple
Documentation
//! This module provides functionality for parsing snippets from input files.
//!
//! It utilizes the `pest` and `scraper` crates for parsing and selecting HTML
//! elements respectively, along with `serde` for serialization and
//! deserialization.
use std::{
    collections::{BTreeMap, HashMap},
    fmt::Write,
};

use pest::{iterators::Pairs, Parser};
use pest_derive::Parser;
use scraper::{Html, Selector};
use serde::{Deserialize, Serialize};

use crate::{
    db,
    errors::{ParseError, ParserResult},
};

#[derive(Parser)]
#[grammar = "snippet.pest"]
struct SnipParser;

/// A struct representing a snippet extracted from the input.
#[derive(Debug, Serialize, Deserialize)]
pub struct Snippet {
    /// ID of the snippet. for example the `snippet-id` value will for the
    /// following snippet : <!-- <snipgrep id="snippet-id">-->
    pub id: String,
    /// Defined if `inject` attribute exists in the snippet. for example: <!--
    /// <snipgrep id="snippet-id" inject>-->
    pub inject: bool,
    /// Collect if `strip_prefix` attribute if exists exists in the snippet.
    /// for example: <!-- <snipgrep id="snippet-id" strip_prefix="xxx">-->
    // pub strip_prefix: Option<String>,
    /// Defined the tag open value of the snippet.
    pub tag_open: String,
    /// Defined the the tag close value of the snippet.
    pub tag_close: String,
    /// Hold all the line content inside the snippet.
    pub snippet: Vec<String>,
}

/// A structure representing a file to be parsed.
pub struct ParseFile<'a> {
    pub input: &'a str,
}

/// A struct representing the injection summary result.
#[derive(Debug, Default, Serialize, Deserialize)]
pub struct InjectSummary {
    /// Hold all the content in the given input with the snipgrep injection
    /// logic
    pub content: String,
    /// Represent the action that occurred.
    pub actions: Vec<InjectAction>,
}

/// The action which occurred
#[derive(Debug, Serialize, Deserialize)]
pub enum InjectAction {
    /// The snippet found and contains the same content
    Equal { snippet_id: String },
    /// The snippet found and the content was injected
    Injected { snippet_id: String, content: String },
    /// When has injected the snippet but not found snippet
    NotFound { snippet_id: String },
}

impl<'a> ParseFile<'a> {
    /// Constructs a new [`ParseFile`] with the provided input.
    #[must_use]
    pub const fn new(input: &'a str) -> Self {
        Self { input }
    }

    /// Parses the input file content and extracts snippets.
    ///
    /// # Errors
    ///
    /// This function may return an error if it fails to parse the input file.
    /// Other errors encountered during parsing will be logged.
    pub fn parse(&self) -> ParserResult<'_, Vec<Snippet>> {
        let pairs = SnipParser::parse(Rule::file, self.input)?;

        let mut findings: Vec<Snippet> = vec![];
        Self::collect_snippets(pairs, &mut findings);
        Ok(findings)
    }

    /// Injects snippets in the input file content based on the provided
    /// [`Snippet`] map.
    ///
    /// # Errors
    ///
    /// This function may return an error if it fails to parse the input file.
    /// Other errors encountered during parsing will be logged.
    pub fn inject(
        &self,
        snippets: &HashMap<String, &db::DbDataSnippet>,
    ) -> ParserResult<'_, InjectSummary> {
        let pairs = SnipParser::parse(Rule::file, self.input)?;

        let mut inject_summary = InjectSummary::default();
        Self::inject_snippets(pairs, &mut inject_summary, snippets)?;

        Ok(inject_summary)
    }

    /// Extracts the tag open element from the captured pair.
    ///
    /// # Panics
    ///
    /// This function assumes that the parsing configuration always captures a
    /// snippet containing a tag open. If this assumption is violated, it
    /// indicates a misconfiguration or a critical issue in the parser's
    /// behavior. Consequently, in production code, encountering this panic
    /// indicates a severe problem that requires immediate attention.
    /// In testing scenarios, this panic should be captured to ensure the
    /// correctness of the parser.
    fn get_tag_open<'b>(pair_children: &'b Pairs<'_, Rule>) -> &'b str {
        let pair = pair_children
            .clone()
            .next()
            .expect("assertion fails, snippet without tag open");

        assert!(
            pair.as_rule() == Rule::tag_open,
            "Expected tag_open rule, found {:?}",
            pair.as_rule()
        );

        pair.into_inner()
            .nth(1)
            .expect("Expected at least two")
            .as_str()
    }

    /// Extracts the comment tag from the captured pair.
    ///
    /// # Panics
    ///
    /// This function assumes that the parsing configuration always captures a
    /// snippet containing a tag open. If this assumption is violated, it
    /// indicates a misconfiguration or a critical issue in the parser's
    /// behavior. Consequently, in production code, encountering this panic
    /// indicates a severe problem that requires immediate attention.
    /// In testing scenarios, this panic should be captured to ensure the
    /// correctness of the parser.
    fn get_comment_tag<'b>(pair_children: &'b Pairs<'_, Rule>) -> &'b str {
        let pair = pair_children
            .clone()
            .next()
            .expect("assertion fails, snippet without tag open");

        assert!(
            pair.as_rule() == Rule::tag_open,
            "Expected tag_open rule, found {:?}",
            pair.as_rule()
        );

        pair.into_inner()
            .nth(0)
            .expect("Expected at least two")
            .as_str()
    }

    fn get_close_tag_of_tag_open<'b>(pair_children: &'b Pairs<'_, Rule>) -> Option<&'b str> {
        let pair = pair_children
            .clone()
            .next()
            .expect("assertion fails, snippet without tag open");

        assert!(
            pair.as_rule() == Rule::tag_open,
            "Expected tag_open rule, found {:?}",
            pair.as_rule()
        );

        pair.into_inner().nth(2).map(|p| p.as_str())
    }

    /// Extracts the tag close element from the captured pair.
    ///
    /// This function iterates over the provided `pair_children` iterator to
    /// find the tag close element. It assumes that the captured pairs
    /// represent a snippet, and it searches for the tag close element
    /// within these pairs.
    ///
    /// # Panics
    ///
    /// This function panics if the tag close element is not found in the
    /// captured pairs. In production code, encountering this panic
    /// indicates a severe problem, such as a misconfiguration or a critical
    /// issue in the parser's behavior, which requires immediate attention.
    /// In testing scenarios, this panic should be captured to ensure the
    /// correctness of the parser.
    fn get_tag_close(pair_children: Pairs<'_, Rule>) -> &'_ str {
        for x in pair_children {
            match x.as_rule() {
                Rule::tag_close => {
                    return x.as_str();
                }
                _ => continue,
            }
        }
        panic!("tag close not found")
    }

    /// Extracts the attributes from the given HTML tag.
    ///
    /// For Example:
    /// * The given tag is `<snipgrep id="quick_start">` the result will be
    ///   {"id": "`quick_start`"}
    ///
    /// # Errors
    ///
    /// This function returns an error in the following cases:
    ///
    /// * The tag is not of type `snipgrep`.
    /// * The tag cannot be parsed.
    pub fn get_attributes(tag: &str) -> ParserResult<'_, BTreeMap<String, String>> {
        let html = Html::parse_fragment(tag);

        let selector_name = "snipgrep";
        let selector = Selector::parse(selector_name)?;
        let attributes = html
            .select(&selector)
            .next()
            .ok_or_else(|| ParseError::SelectorNotFound {
                selector: selector_name.to_string(),
                tag: tag.to_string(),
            })?
            .value()
            .attrs();

        Ok(attributes
            .map(|(key, value)| (key.to_string(), value.to_string()))
            .collect())
    }

    /// Recursively collects snippets from the provided pairs and populates the
    /// given vector with the snippets.
    ///
    /// This function recursively traverses the pairs, extracting snippets and
    /// their attributes.
    fn collect_snippets(pairs: Pairs<'_, Rule>, snippets: &mut Vec<Snippet>) {
        if pairs.len() == 0 {
            return;
        }

        for pair in pairs {
            let inner = pair.clone().into_inner();

            match pair.as_rule() {
                Rule::snippet => {
                    let children: Pairs<'_, Rule> = pair.clone().into_inner();
                    let tag_open = Self::get_tag_open(&children);
                    let tag_close = Self::get_tag_close(children.clone());

                    tracing::debug!(tag_open, "found open tag");
                    let attributes = match Self::get_attributes(tag_open) {
                        Ok(attributes) => attributes,
                        Err(err) => {
                            tracing::debug!(tag_open, err = %err, "could not extract attributes from the tag");
                            continue;
                        }
                    };

                    tracing::debug!(
                        tag_open,
                        attributes = format!("{:#?}", attributes),
                        "found attributes"
                    );

                    let mut lines = pair
                        .as_str()
                        .split('\n')
                        .map(std::string::ToString::to_string)
                        .skip(1)
                        .collect::<Vec<_>>();
                    lines.pop();

                    snippets.push(Snippet {
                        // Attribute ID as part of the parser configuration is
                        // mandatory. the snippet should't be captured if id
                        // element is not present. In this case
                        // user `expect` should brake the parser.
                        id: attributes
                            .get("id")
                            .expect("assertion fails, snippet without element id")
                            .to_string(),
                        inject: attributes.contains_key("inject"),
                        // strip_prefix: attributes.get("strip_prefix").cloned(),
                        tag_open: tag_open.to_string(),
                        tag_close: tag_close.to_string(),
                        snippet: lines,
                    });

                    Self::collect_snippets(children, snippets);
                }
                _ => {
                    Self::collect_snippets(inner.clone(), snippets);
                }
            }
        }
    }

    /// Injects snippets in the input file content based on the provided
    /// `snippets` map.
    ///
    /// # Errors
    ///
    /// This function may return an error if it fails to parse the input file.
    /// Other errors encountered during parsing will be logged.
    fn inject_snippets(
        pairs: Pairs<'_, Rule>,
        summary: &mut InjectSummary,
        snippets: &HashMap<String, &db::DbDataSnippet>,
    ) -> ParserResult<'a, ()> {
        if pairs.len() == 0 {
            return Ok(());
        }

        for pair in pairs {
            let inner = pair.clone().into_inner();

            if pair.as_rule() == Rule::snippet {
                let children: Pairs<'_, Rule> = pair.clone().into_inner();

                let tag_open = Self::get_tag_open(&children);
                let tag_close = Self::get_tag_close(children.clone());

                let attributes = match Self::get_attributes(tag_open) {
                    Ok(attributes) => attributes,
                    Err(err) => {
                        tracing::debug!(tag_open, err = %err, "could not extract attributes from the tag");
                        continue;
                    }
                };

                let snippet_id = attributes
                    .get("id")
                    .expect("assertion fails, snippet without element id");

                if attributes.contains_key("inject") {
                    if let Some(snippet) = snippets.get(snippet_id) {
                        let snippet_content = snippet
                            .get_snippet(attributes.get("strip_prefix"))
                            .join("\n");

                        let comment_tag = Self::get_comment_tag(&children);
                        let close_tag_of_tag_open = Self::get_close_tag_of_tag_open(&children);

                        let inject_result = format!(
                            "{comment_tag}{tag_open}{}\n{snippet_content}\n{tag_close}",
                            close_tag_of_tag_open.unwrap_or_default()
                        );

                        summary.content.write_str(&inject_result)?;

                        if pair.as_str() == inject_result {
                            summary.actions.push(InjectAction::Equal {
                                snippet_id: snippet_id.to_string(),
                            });
                        } else {
                            summary.actions.push(InjectAction::Injected {
                                snippet_id: snippet_id.to_string(),
                                content: snippet_content,
                            });
                        }
                    } else {
                        summary.actions.push(InjectAction::NotFound {
                            snippet_id: snippet_id.to_string(),
                        });
                    }
                } else {
                    summary.content.write_str(pair.as_str())?;
                }
            } else {
                Self::inject_snippets(inner.clone(), summary, snippets)?;
                if inner.len() == 0 {
                    summary.content.write_str(pair.as_str())?;
                }
            }
        }
        Ok(())
    }
}