nom_html_parser 0.1.1

A parser to convert HTML string to HTML tree structure written with Nom
Documentation
use nom::*;

use nom::{
  IResult,
  bytes::complete::{tag, take_until},
  character::complete::{char},
  multi::{many_till},
  combinator::{opt},
  error::{context, VerboseError},
};

use crate::parser::tag::HtmlElement;
use crate::parser::errors::*;
use crate::parser::attributes::*;
use crate::parser::tag::html_tag;

pub fn is_html_value(s: char) -> bool { s.is_alphanumeric() || s == '_' || s == '-' || s == '@' || s == '{' || s == '}' || s == '\'' || s == ':' }

pub fn space_and_opening_tag<'a>(input: &'a str) -> IResult<&'a str, (&'a str, &'a str, TagAttributes, char), VerboseError<&str>> {
    let (input, (space_before, tag)) = do_parse!(
        input,
        space: take_until!("<") >>
        char!('<') >>
        tag: opt!(take_while!(is_html_value)) >>
        ((space, tag.unwrap_or("")))
    )?;
    let tag = if tag.clone().chars().next() == Some('@') {
      tag
        .char_indices()
        .nth(1)
        .and_then(|(i, _)| tag.get(i..))
        .unwrap_or("")
    } else {
      tag
    };
    // println!("space_before and tag: {:?}, {:?}", space_before, tag);
    // println!("after space_before: {:?}", input);
    // parse the attributes until we met the closing tag
    let (input, (attributes, close_tag)) = many_till(
      attribute,
      char('>')
    )(input)?;
    let attributes = attributes.iter().map(|e| (get_denominator(e.0), e.1.to_string())).collect();
    // println!("after attributes: {:?}", input);
    Ok((input, (space_before, tag, attributes, close_tag)))
}

#[derive(Debug, Clone, PartialEq)]
pub enum Node {
    Text(String),
    Element(HtmlElement),
    Component(HtmlElement),
}

pub fn parse_inner_content(closing_tag: String) -> impl Fn(&str) -> IResult<&str, (&str, &str), nom::error::VerboseError<&str>> {
  move |input| -> IResult<&str, (&str, &str), nom::error::VerboseError<&str>> {
    let (input, content) = context(
      TAG_CONTENT_ERROR,
      opt(take_until(closing_tag.clone().as_ref()))
    )(input)?;
    let (input, end) = context(
      CLOSING_TAG_ERROR,
      tag(closing_tag.as_ref())
    )(input)?;
    Ok((input, (content.unwrap_or(""), end)))
  }
}

fn parse_child(input: &str) -> IResult<&str, Node, VerboseError<&str>> {
    let (input, child) = html_tag()(input)?;
    Ok((input, Node::Element(child)))
}

fn parse_component(input: &str) -> IResult<&str, Node, VerboseError<&str>> {
    let (input, child) = html_tag()(input)?;
    Ok((input, Node::Component(child)))
}

pub fn content_or_child(input: &str) -> IResult<&str, Option<Node>, nom::error::VerboseError<&str>> {
    let mut lines = input.clone().lines();
    // println!("input: {:?}", input);
    let _new_line = lines.next();

    let (input, _) = context(
      NEW_LINE_ERROR,
      char('\n')
    )(input)?;

    match lines.next() {
      Some(s) => {
        // println!("line: {:?}", s);
        // println!("opening tags: {:?}", opening_tags);
        // println!("closing_tag tags: {:?}", closing_tags);
        let mut it = s.clone().trim_start().chars();
        let n = it.next();
        if  n != Some('<') {
          println!("inside input: {:?}", input);
          let (_, content) = context(
            TAG_CONTENT_ERROR,
            opt(move|c_input: &str| {
              let (c_input, rest) = nom::combinator::rest(c_input)?;
              Ok((c_input, (Node::Text(rest.to_string()), rest)))
            })
          )(input)?;
          let content = content.unwrap();
          let (input, _) = context(
            TAG_CONTENT_ERROR,
            tag(content.1)
          )(input)?;
          Ok((input, Some(content.0)))
        } else if n == Some('<') && it.next() == Some('@') {
          let (input, child) = parse_component(input)?;
          Ok((input, Some(child)))
        } else {
          let (input, child) = parse_child(input)?;
          Ok((input, Some(child)))
        }
      },
      None => {
        println!("{}", "no lines");
        Ok((input, None))
      }
    }
}


pub fn consume_till_end() -> impl Fn(&str) -> IResult<&str, Vec<Node>, nom::error::VerboseError<&str>> {
    move |input| {
        let mut acc = vec![];

        // println!("consume till end: input: {:?}", input);
        // println!("consume till end: acc: {:?}", acc);

        let mut input = input.clone();

        while !input.is_empty() {
            // println!("consume till end: progress: {:?}", input);
            // println!("consume till end: progress: {:?}", acc);
            let (consumed, corc) = content_or_child(input)?;
            if corc.is_some() {
              acc.push(corc.unwrap());
            };
            input = consumed;
        };

        // let (input, ending_tag) = tag!(input, ending_tag.as_ref())?;
        // println!("consume till end: ending tag: {:?}", ending_tag);

        // println!("consume till end: out input: {:?}", input);
        Ok((input, acc))
    }
}

pub fn content_and_childs(input: &str) -> IResult<&str, Vec<Node>, nom::error::VerboseError<&str>> {
    // println!("content and childs: input: {:?}", input);
    let (input, nodes): (&str, Vec<Node>) = consume_till_end()(input)?; 
    // println!("content and child after: input: {:?}", input);
    // println!("content and child after: nodes: {:?}", nodes);
    Ok((input, nodes))
}