use crate::bindings::*;
use crate::c_helpers::*;
use crate::tree::*;
use std::ffi::{CStr, CString};
use std::fmt;
use std::ptr;
use std::str;
enum XmlParserOption {
Recover = 1, Noerror = 32, Nowarning = 64, }
enum HtmlParserOption {
Recover = 1, Noerror = 32, Nowarning = 64, }
pub enum XmlParseError {
GotNullPointer,
}
impl fmt::Debug for XmlParseError {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match *self {
XmlParseError::GotNullPointer => write!(f, "Got a Null pointer"),
}
}
}
#[derive(PartialEq)]
pub enum ParseFormat {
XML,
HTML,
}
pub struct Parser {
pub format: ParseFormat,
}
impl Default for Parser {
fn default() -> Self {
Parser {
format: ParseFormat::XML,
}
}
}
impl Parser {
pub fn default_html() -> Self {
Parser {
format: ParseFormat::HTML,
}
}
pub fn parse_file(&self, filename: &str) -> Result<Document, XmlParseError> {
let c_filename = CString::new(filename).unwrap();
let c_utf8 = CString::new("utf-8").unwrap();
unsafe {
xmlKeepBlanksDefault(1);
}
match self.format {
ParseFormat::XML => {
let options: i32 = XmlParserOption::Recover as i32
+ XmlParserOption::Noerror as i32
+ XmlParserOption::Nowarning as i32;
unsafe {
let doc_ptr = xmlReadFile(c_filename.as_ptr(), c_utf8.as_ptr(), options);
if doc_ptr.is_null() {
Err(XmlParseError::GotNullPointer)
} else {
Ok(Document::new_ptr(doc_ptr))
}
}
}
ParseFormat::HTML => {
let options: i32 = HtmlParserOption::Recover as i32
+ HtmlParserOption::Noerror as i32
+ HtmlParserOption::Nowarning as i32;
unsafe {
let doc_ptr = htmlReadFile(c_filename.as_ptr(), c_utf8.as_ptr(), options);
if doc_ptr.is_null() {
Err(XmlParseError::GotNullPointer)
} else {
Ok(Document::new_ptr(doc_ptr))
}
}
}
}
}
pub fn parse_string(&self, input_string: &str) -> Result<Document, XmlParseError> {
let c_string = CString::new(input_string).unwrap();
let c_utf8 = CString::new("utf-8").unwrap();
let c_url = CString::new("").unwrap();
match self.format {
ParseFormat::XML => unsafe {
let options: i32 = XmlParserOption::Recover as i32
+ XmlParserOption::Noerror as i32
+ XmlParserOption::Nowarning as i32;
let docptr = xmlReadDoc(
c_string.as_bytes().as_ptr(),
c_url.as_ptr(),
c_utf8.as_ptr(),
options,
);
if docptr.is_null() {
Err(XmlParseError::GotNullPointer)
} else {
Ok(Document::new_ptr(docptr))
}
},
ParseFormat::HTML => unsafe {
let options: i32 = HtmlParserOption::Recover as i32
+ HtmlParserOption::Noerror as i32
+ HtmlParserOption::Nowarning as i32;
let docptr = htmlReadDoc(
c_string.as_bytes().as_ptr(),
c_url.as_ptr(),
c_utf8.as_ptr(),
options,
);
if docptr.is_null() {
Err(XmlParseError::GotNullPointer)
} else {
Ok(Document::new_ptr(docptr))
}
},
}
}
pub fn is_well_formed_html(&self, input_string: &str) -> bool {
if input_string.is_empty() {
return false;
}
let c_string = CString::new(input_string).unwrap();
let c_utf8 = CString::new("utf-8").unwrap();
match self.format {
ParseFormat::XML => false, ParseFormat::HTML => unsafe {
let ctxt = htmlNewParserCtxt();
setWellFormednessHandler(ctxt);
let docptr = htmlCtxtReadDoc(
ctxt,
c_string.as_bytes().as_ptr(),
ptr::null_mut(),
c_utf8.as_ptr(),
10_596,
); let well_formed_final = if htmlWellFormed(ctxt) {
if !docptr.is_null() {
let node_ptr = xmlDocGetRootElement(docptr);
let name_ptr = xmlNodeGetName(node_ptr);
if name_ptr.is_null() {
false
}
else {
let c_root_name = CStr::from_ptr(name_ptr);
let root_name = str::from_utf8(c_root_name.to_bytes()).unwrap().to_owned();
root_name == "html"
}
} else {
false
}
} else {
false
};
if !ctxt.is_null() {
htmlFreeParserCtxt(ctxt);
}
if !docptr.is_null() {
xmlFreeDoc(docptr);
}
well_formed_final
},
}
}
}