capricorn 0.1.93

Parse html according to configuration
Documentation

Capricorn

Parse html according to configuration.

Capricorn is a html parsing library that supports recursion and custom execution order.

Version info Downloads docs example branch parameter dependency status

Default execution order

vec![String::from("selects"),
    String::from("each"),
    String::from("select_params"),
    String::from("nodes"),
    String::from("has"),
    String::from("contains")];
    
selects > each > (one or all or fields) > ... text_attr_html > (text or attr or html);
selects > select_params > selects > ... text_attr_html > (text or attr or html);
selects > nodes > has > contains > text_attr_html > (text or attr or html);

Support:

Capricorn support example val type
selects element field_name:   selects:       - element_name String
selects class field_name:   selects:       - .class_name String
selects class element field_name:   selects:       - .class_name       - element_name String
first field_name:   selects:       - element_name   nodes:       first: true String
last field_name:   selects:       - element_name   nodes:       last: true String
eq field_name:   selects:       - element_name   nodes:       eq: 0 String
parent field_name:   selects:       - element_name   nodes:       parent: true String
children field_name:   selects:       - element_name   nodes:       children: true String
prev_sibling field_name:   selects:       - element_name   nodes:       prev_sibling: true String
next_sibling field_name:   selects:       - element_name   nodes:       next_sibling: true String
has_class field_name:   selects:       - element_name   has:       class: class_name String
has_attr field_name:   selects:       - element_name   has:       attr: attr_name String
each one field_name:   selects:       - element_name   each:       one:           selects:              - .class_name          ... String
each all field_name:   selects:       - element_name   each:       all:           selects:              - .class_name          ... Array
each fields field_name:   selects:       - element_name   each:       fields:         field_name:           selects:              - .class_name          ...         field_name1:           selects:              - .class_name          ... Map
select_params field_name:   selects:       - element_name   select_params:       selects:          - .class_name      ... ...
text field_name:   selects:       - element_name   text_attr_html:       text: true String
attr field_name:   selects:       - element_name   text_attr_html:       attr: true String
html field_name:   selects:       - element_name   text_attr_html:       html: true String
text contains field_name:   selects:       - element_name   contains:       contains:           text:               - test String
text not contains field_name:   selects:       - element_name   contains:       not_contains:           text:               - test String
html contains field_name:   selects:       - element_name   contains:       contains:           html:               - test String
html not contains field_name:   selects:       - element_name   contains:       not_contains:           html:               - test String
exec order field_name:   exec_order:       - selects       - has       - nodes   selects:       - element_name   has:       class: class_name   nodes:       first: true String
data format splits field_name:   selects:       - element_name   data_format:       splits:           - { key: str } Array
data format splits field_name:   selects:       - element_name   data_format:       splits:           - { key: str, index: 0 } String
data format replaces field_name:   selects:       - element_name   data_format:       replaces:           - str String
data format deletes field_name:   selects:       - element_name   data_format:       deletes:           - str String
data format find field_name:   selects:       - element_name   data_format:       find:         - regex String
data format find_iter field_name:   selects:       - element_name   data_format:       find_iter:         - regex Array
Multi-version regular matching err regexes_match_parse_html:     - regex: regex       version: 1       err: err_msg Err
Multi-version regular matching fields regexes_match_parse_html:     - regex: regex       version: 1       fields:         field_name:           selects:           ...         field_name:           selects:           ... Map

Parse html code, more...

let yml = read_file("./test_html/test.yml").unwrap();
let params: parse::HashMapSelectParams = serde_yaml::from_str(&yml).unwrap();
let html = read_file("./test_html/test.html").unwrap();
let r = parse::parse_html(&params, &html);

Multi-version regular matching parsing html code, more...

let yml = read_file("./test_html/regexes_match_parse_html.yml").unwrap();
let v:  match_html::MatchHtmlVec = serde_yaml::from_str(&yml).unwrap();
let html = read_file("./test_html/test.html").unwrap();
let r =  v.regexes_match_parse_html(html)?;