Capricorn
Parse html according to configuration.
Capricorn is a html parsing library that supports recursion and custom execution order.
Default execution order
vec![String::from("selects"),
String::from("each"),
String::from("select_params"),
String::from("nodes"),
String::from("has"),
String::from("contains")];
selects > each > (one or all or fields) > ... text_attr_html > (text or attr or html);
selects > select_params > selects > ... text_attr_html > (text or attr or html);
selects > nodes > has > contains > text_attr_html > (text or attr or html);
Support:
| Capricorn | support | example | val type |
|---|---|---|---|
| selects element | ✔ | field_name: selects: - element_name | String |
| selects class | ✔ | field_name: selects: - .class_name | String |
| selects class element | ✔ | field_name: selects: - .class_name - element_name | String |
| first | ✔ | field_name: selects: - element_name nodes: first: true | String |
| last | ✔ | field_name: selects: - element_name nodes: last: true | String |
| eq | ✔ | field_name: selects: - element_name nodes: eq: 0 | String |
| parent | ✔ | field_name: selects: - element_name nodes: parent: true | String |
| children | ✔ | field_name: selects: - element_name nodes: children: true | String |
| prev_sibling | ✔ | field_name: selects: - element_name nodes: prev_sibling: true | String |
| next_sibling | ✔ | field_name: selects: - element_name nodes: next_sibling: true | String |
| has_class | ✔ | field_name: selects: - element_name has: class: class_name | String |
| has_attr | ✔ | field_name: selects: - element_name has: attr: attr_name | String |
| each one | ✔ | field_name: selects: - element_name each: one: selects: - .class_name ... | String |
| each all | ✔ | field_name: selects: - element_name each: all: selects: - .class_name ... | Array |
| each fields | ✔ | field_name: selects: - element_name each: fields: field_name: selects: - .class_name ... field_name1: selects: - .class_name ... | Map |
| select_params | ✔ | field_name: selects: - element_name select_params: selects: - .class_name ... | ... |
| text | ✔ | field_name: selects: - element_name text_attr_html: text: true | String |
| attr | ✔ | field_name: selects: - element_name text_attr_html: attr: true | String |
| html | ✔ | field_name: selects: - element_name text_attr_html: html: true | String |
| text contains | ✔ | field_name: selects: - element_name contains: contains: text: - test | String |
| text not contains | ✔ | field_name: selects: - element_name contains: not_contains: text: - test | String |
| html contains | ✔ | field_name: selects: - element_name contains: contains: html: - test | String |
| html not contains | ✔ | field_name: selects: - element_name contains: not_contains: html: - test | String |
| exec order | ✔ | field_name: exec_order: - selects - has - nodes selects: - element_name has: class: class_name nodes: first: true | String |
| data format splits | ✔ | field_name: selects: - element_name data_format: splits: - { key: str } | Array |
| data format splits | ✔ | field_name: selects: - element_name data_format: splits: - { key: str, index: 0 } | String |
| data format replaces | ✔ | field_name: selects: - element_name data_format: replaces: - str | String |
| data format deletes | ✔ | field_name: selects: - element_name data_format: deletes: - str | String |
| data format find | ✔ | field_name: selects: - element_name data_format: find: - regex | String |
| data format find_iter | ✔ | field_name: selects: - element_name data_format: find_iter: - regex | Array |
| Multi-version regular matching err | ✔ | regexes_match_parse_html: - regex: regex version: 1 err: err_msg | Err |
| Multi-version regular matching fields | ✔ | regexes_match_parse_html: - regex: regex version: 1 fields: field_name: selects: ... field_name: selects: ... | Map |
Parse html code, more...
let yml = read_file("./test_html/test.yml").unwrap();
let params: parse::HashMapSelectParams = serde_yaml::from_str(&yml).unwrap();
let html = read_file("./test_html/test.html").unwrap();
let r = parse::parse_html(¶ms, &html);
Multi-version regular matching parsing html code, more...
let yml = read_file("./test_html/regexes_match_parse_html.yml").unwrap();
let v: match_html::MatchHtmlVec = serde_yaml::from_str(&yml).unwrap();
let html = read_file("./test_html/test.html").unwrap();
let r = v.regexes_match_parse_html(html)?;