requests2 0.1.62

simple http client by rust
Documentation
//! ## Parser 解析器
//! 使用了类似python bs4 的结构
//! --snip--

use std::collections::HashMap;

use crate::value::Value;
use regex::{Captures, Regex};
use select::{
    document::Document,
    node::Node,
    predicate::{Any, Attr, Name, Predicate},
};

pub struct Parser<'a> {
    document: &'a Document,
    css_process: HashMap<&'a str, Box<dyn Fn(Captures) -> Vec<Node<'a>> + 'a>>,
}

impl<'a> Parser<'a> {
    pub fn new(document: &'a Document) -> Self {
        Parser {
            document,
            css_process: HashMap::new(),
        }
    }

    pub fn defined_css_processes(&mut self) {
        self.css_process.insert(
            r"^\.([\w-]+)$",
            Box::new(|caps| {
                self.document
                    .find(Any)
                    .filter(|n| {
                        n.attr("class")
                            .map_or(false, |v| v.split(" ").any(|v| v == &caps[1]))
                    })
                    .collect::<Vec<Node>>()
            }),
        );
        self.css_process.insert(
            r"^#([\w-]+)$",
            Box::new(|caps| {
                self.document
                    .find(Attr("id", &caps[1]))
                    .map(|n| n)
                    .collect::<Vec<Node>>()
            }),
        );
        self.css_process.insert(
            r"^\.([\w-]+)\.([\w-]+)$",
            Box::new(|caps| {
                self.document
                    .find(Any)
                    .filter(|n| {
                        n.attr("class")
                            .map_or(false, |v| v.contains(&caps[1]) && v.contains(&caps[2]))
                    })
                    .collect::<Vec<Node>>()
            }),
        );
        self.css_process.insert(
            r"^([\w-]+)$",
            Box::new(|caps| {
                self.document
                    .find(Name(&caps[1]))
                    .map(|n| n)
                    .collect::<Vec<Node>>()
            }),
        );
        self.css_process.insert(
            r"\[([\w-]+)=([\w-]+)\]",
            Box::new(|caps| {
                self.document
                    .find(Attr(&caps[1], &caps[2]))
                    .map(|n| n)
                    .collect::<Vec<Node>>()
            }),
        );
        self.css_process.insert(
            r"^([\w-]+)\.([\w-]+)",
            Box::new(|caps| {
                self.document
                    .find(Name(&caps[1]))
                    .filter(|n| {
                        n.attr("class")
                            .map_or(false, |v| v.split(" ").any(|v| v == &caps[2]))
                    })
                    .collect::<Vec<Node>>()
            }),
        );
        self.css_process.insert(
            r"^([\w-]+)\s+([\w-]+)$",
            Box::new(|caps| {
                self.document
                    .find(Name(&caps[1]).descendant(Name(&caps[2])))
                    .map(|n| n)
                    .collect::<Vec<Node>>()
            }),
        );
        self.css_process.insert(
            r"\[([\w-]+)~([\w-]+)\]",
            Box::new(|caps| {
                let reg = Regex::new(&caps[2]).unwrap();

                self.document
                    .find(Any)
                    .filter(|n| n.attr(&caps[1]).map_or(false, |v| reg.is_match(v)))
                    .collect::<Vec<Node>>()
            }),
        );

        self.css_process.insert(
            r"^([\w-]+)\.([\w-]+)\s+([\w-]+)",
            Box::new(|caps| {
                self.document
                    .find(Name(&caps[1]))
                    .filter(|n| {
                        n.attr("class")
                            .map_or(false, |v| v.split(" ").any(|v| v == &caps[2]))
                    })
                    .collect::<Vec<Node>>()
                    .iter()
                    .map(|n| n.find(Name(&caps[3])).next().unwrap())
                    .collect::<Vec<Node>>()
            }),
        );

        self.css_process.insert(
            r"^([\w-]+)#([\w-]+)\s+([\w-]+)",
            Box::new(|caps| {
                // css selector div#id span
                self.document
                    .find(Name(&caps[1]))
                    .filter(|n| n.attr("id").map_or(false, |v| v == &caps[2]))
                    .collect::<Vec<Node>>()
                    .iter()
                    .map(|n| n.find(Name(&caps[3])).next().unwrap())
                    .collect::<Vec<Node>>()
            }),
        );
    }

    pub fn _select(&self, selector: &str) -> Vec<Node> {
        let mut nodes = Vec::new();

        for (reg, v) in &self.css_process {
            let reg = Regex::new(reg).unwrap();
            if reg.is_match(selector) {
                let caps = reg.captures(selector).unwrap();
                nodes = v(caps);
            }
        }

        nodes
    }

    pub fn select(&self, selector: &str) -> Node {
        self._select(selector)[0]
    }

    pub fn select_all(&self, selector: &str) -> Vec<Node> {
        self._select(selector)
    }

    fn _find<F>(&self, selector: &str, filter: F, attr: &str) -> Value
    where
        F: FnMut(&select::node::Node<'_>) -> bool,
    {
        let data = self
            ._select(selector)
            .into_iter()
            .filter(filter)
            .filter_map(|x| {
                if attr == "text" {
                    Some(x.text().split_whitespace().collect::<String>())
                } else {
                    Some(x.attr(attr).map_or(String::from(""), |x| x.to_string()))
                }
            })
            .map(|x| x.to_string())
            .collect::<_>();

        Value::LIST(data)
    }

    pub fn find_all<F>(&self, selector: &str, filter: F, attr: &str) -> Value
    where
        F: FnMut(&select::node::Node<'_>) -> bool,
    {
        self._find(selector, filter, attr)
    }

    pub fn find<F>(&self, selector: &str, filter: F, attr: &str) -> Value
    where
        F: FnMut(&select::node::Node<'_>) -> bool,
    {
        let data = self._find(selector, filter, attr);

        if let Value::LIST(d) = data {
            Value::STR(d[0].clone())
        } else {
            Value::STR(String::from(""))
        }
    }
}